In [None]:
import torch
import clip
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

image = preprocess(Image.open("CLIP.png")).unsqueeze(0).to(device)
text = clip.tokenize(["a diagram", "a dog", "a cat"]).to(device)

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    # similarity = torch.nn.CosineSimilarity(image_feature1, image_feature2)
    
    logits_per_image, logits_per_text = model(image, text)
    print(logits_per_image)
    print(logits_per_text)
    probs = logits_per_image.softmax(dim=-1).cpu().numpy()

print("Label probs:", probs)  # prints: [[0.9927937  0.00421068 0.00299572]]

In [1]:
import os
import clip
import torch
from torchvision.datasets import CIFAR100
import cv2
import numpy as np

# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)

# Download the dataset
cifar100 = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False)

# Prepare the inputs
image, class_id = cifar100[3637]
# image2=np.array(image)
# #print(image2)
# #img = cv2.imread(image2)
# cv2.imshow('image', image2)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

image_input = preprocess(image).unsqueeze(0).to(device)
print(image_input.shape)
text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in cifar100.classes]).to(device)

# Calculate features
with torch.no_grad():
    image_features = model.encode_image(image_input)
    print(image_features.shape)
    text_features = model.encode_text(text_inputs)
    print(text_features.shape)
# Pick the top 5 most similar labels for the image
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
values, indices = similarity[0].topk(5)

# Print the result
print("\nTop predictions:\n")
for value, index in zip(values, indices):
    print(f"{cifar100.classes[index]:>16s}: {100 * value.item():.2f}%")

Files already downloaded and verified
torch.Size([1, 3, 224, 224])
torch.Size([1, 512])
torch.Size([100, 512])

Top predictions:

           snake: 65.28%
          turtle: 12.26%
    sweet_pepper: 3.86%
          lizard: 1.88%
       crocodile: 1.77%


In [2]:
import os
import clip
import torch

import numpy as np
from sklearn.linear_model import LogisticRegression
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR100
from tqdm import tqdm

# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)

# Load the dataset
root = os.path.expanduser("~/.cache")
train = CIFAR100(root, download=True, train=True, transform=preprocess)
test = CIFAR100(root, download=True, train=False, transform=preprocess)


def get_features(dataset):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(DataLoader(dataset, batch_size=100)):
            features = model.encode_image(images.to(device))

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

# Calculate the image features
train_features, train_labels = get_features(train)
test_features, test_labels = get_features(test)

# Perform logistic regression
classifier = LogisticRegression(random_state=0, C=0.316, max_iter=1000, verbose=1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 500/500 [01:35<00:00,  5.25it/s]
100%|██████████| 100/100 [00:19<00:00,  5.21it/s]
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =        51300     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  2.30259D+05    |proj g|=  7.65441D+02


 This problem is unconstrained.



At iterate   50    f=  3.39192D+04    |proj g|=  4.96241D+02

At iterate  100    f=  2.93372D+04    |proj g|=  1.13858D+03

At iterate  150    f=  2.83932D+04    |proj g|=  4.18957D+02

At iterate  200    f=  2.81805D+04    |proj g|=  8.47547D+01

At iterate  250    f=  2.81458D+04    |proj g|=  3.42762D+01

At iterate  300    f=  2.81389D+04    |proj g|=  9.36279D+00

At iterate  350    f=  2.81364D+04    |proj g|=  5.97040D+00

At iterate  400    f=  2.81340D+04    |proj g|=  8.25179D+00

At iterate  450    f=  2.81285D+04    |proj g|=  1.90362D+01

At iterate  500    f=  2.81166D+04    |proj g|=  3.14495D+01

At iterate  550    f=  2.81056D+04    |proj g|=  3.57362D+00

At iterate  600    f=  2.81027D+04    |proj g|=  2.75133D+00

At iterate  650    f=  2.81021D+04    |proj g|=  3.02651D+00

At iterate  700    f=  2.81019D+04    |proj g|=  1.34445D+00

At iterate  750    f=  2.81018D+04    |proj g|=  9.81402D-01

At iterate  800    f=  2.81016D+04    |proj g|=  3.48979D+00

At iter

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.0min finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
