In [38]:
%pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-h80ymyro
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-h80ymyro
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import clip
import torch
import numpy as np
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.linear_model import LogisticRegression
import os
from tqdm import tqdm
import pandas as pd

In [None]:
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.get_device_name()

'Tesla T4'

In [None]:
model, preprocess = clip.load('ViT-B/32' , device = device)

100%|███████████████████████████████████████| 338M/338M [00:04<00:00, 72.2MiB/s]


In [None]:
sum(params.numel() for params in model.parameters())

151277313

In [None]:
preprocess

Compose(
    Resize(size=224, interpolation=bicubic, max_size=None, antialias=warn)
    CenterCrop(size=(224, 224))
    <function _convert_image_to_rgb at 0x78532395e0e0>
    ToTensor()
    Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
)

In [None]:
class ImageDataset(Dataset):
    def __init__(self,csv_file, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.__annotations__ = pd.read_csv(csv_file)

        # Load image paths
        self.image_paths = os.listdir(root_dir)

    def __len__(self):
        return len(self.__annotations__)

    def __getitem__(self, index):
        image_path = os.path.join(self.root_dir, self.__annotations__.iloc[index,0])
        y_label = torch.tensor(int(self.__annotations__.iloc[index,1]))

        # Load image and perform transformations
        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)

        # Convert image to tensor
        image = torch.from_numpy(np.array(image)).float()

        return image, y_label


In [None]:
def get_features(dataset):
    all_features = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(DataLoader(dataset, batch_size=100)):
            features = model.encode_image(images.to(device))

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [None]:
#['sadness', 'fear', 'disgust', 'contentment', 'anger', 'awe', 'excitment', 'amusement']
emotions = ['anger','joy','sad']

In [None]:
train = ImageDataset(csv_file='/content/drive/MyDrive/Prism/train_final.csv',root_dir="/content/drive/MyDrive/Prism/train",transform=preprocess)

In [None]:
test = ImageDataset(csv_file='/content/drive/MyDrive/Prism/test_final.csv',root_dir="/content/drive/MyDrive/Prism/test",transform=preprocess)

In [None]:
train_features, train_labels = get_features(train)
test_features, test_labels = get_features(test)

100%|██████████| 1/1 [00:36<00:00, 36.26s/it]
100%|██████████| 1/1 [00:20<00:00, 20.65s/it]


In [None]:
classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1)
classifier.fit(train_features, train_labels)

In [None]:
# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

Accuracy = 88.889


In [None]:
test_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2])

In [None]:
predictions

array([0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2])

In [None]:
test_image= preprocess(Image.open("/content/drive/MyDrive/Prism/testimage.jpg")).unsqueeze(0).to(device)
test_image.shape

torch.Size([1, 3, 224, 224])

In [None]:
test_image_features = model.encode_image(test_image)
test_image_features = torch.detach(test_image_features).cpu().numpy()

In [None]:
pred = classifier.predict(test_image_features)

In [None]:
emotions[pred[0]]

'joy'