In [14]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import skimage.io as io
import pandas as pd
from Dataset_classes.Flowers.Flowers_Dataset import Flowers102Dataset as Flowers_Dataset
import torchvision

import os
import json
import numpy as np

import matplotlib.pyplot as plt

from collections import OrderedDict
from pathlib import Path

import skimage
import IPython.display
import matplotlib.pyplot as plt
from PIL import Image

from clip import clip

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [15]:

################################################# Files import  #################################################

# Get current directory
current_dir = os.path.dirname(os.path.abspath(os.getcwd()))

# Get the outer directory where Config.json file is located
MLCOMP_dir = os.path.abspath(os.path.join(current_dir)) # os.pardir allows to go one directory back

# Path to the config file
# Config.json file contains the paths to the input dataset and output dataset directories
config_path = os.path.join(MLCOMP_dir, 'Config.json')

with open(config_path) as f:
    config = json.load(f)

# Get the statistics.json file path where the std and mean are stored
statistics_path = config['statistics_flowers102']

# Open the stats.json file and set the dictionary dataset_stats for normalization
with open(statistics_path) as f:
    dataset_stats = json.load(f)

# Directory containing the images
img_dir = config['output_dir_flowers102']

# Paths to the labels file
labels_file = config['output_labels_flowers102']

# Paths to the setid file
setid_file = config['output_setid_flowers102']

flowers_names_file = config['flowers_name']

with open(flowers_names_file) as f:
    flowers_names = json.load(f)


In [16]:
################################################# Dataset preparation  #################################################
dataset = Flowers_Dataset(csv_file = labels_file, root_dir= img_dir)
train_set, test_set = torch.utils.data.random_split(dataset, [7000,1189])

train_loader = DataLoader(dataset=train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=32, shuffle=True)


In [17]:
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [18]:
model, preprocess = clip.load("ViT-B/16")
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device).eval()

In [19]:
print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", model.visual.input_resolution)
print("Context length:", model.context_length)
print("Vocab size:", model.vocab_size)

Model parameters: 149,620,737
Input resolution: 224
Context length: 77
Vocab size: 49408


In [20]:
images = []
labels = set()

for i in range(len(train_set)):
    image, label = dataset.__getitem__(i)
    image = torch.tensor(image)
    images.append(image)
    labels.add(flowers_names[str(int(label))])  # Assuming class names are stored as strings in the json

In [30]:
labels

{'alpine sea holly',
 'anthurium',
 'artichoke',
 'azalea',
 'ball moss',
 'balloon flower',
 'barbeton daisy',
 'bearded iris',
 'bee balm',
 'bird of paradise',
 'bishop of llandaff',
 'black-eyed susan',
 'buttercup',
 'californian poppy',
 'canna lily',
 'canterbury bells',
 'cape flower',
 'carnation',
 'cautleya spicata',
 'clematis',
 "colt's foot",
 'columbine',
 'common dandelion',
 'corn poppy',
 'cyclamen',
 'daffodil',
 'desert-rose',
 'english marigold',
 'fire lily',
 'frangipani',
 'fritillary',
 'garden phlox',
 'gazania',
 'geranium',
 'giant white arum lily',
 'globe-flower',
 'grape hyacinth',
 'great masterwort',
 'hard-leaved pocket orchid',
 'hibiscus',
 'hippeastrum',
 'king protea',
 'lenten rose',
 'lotus lotus',
 'love in the mist',
 'magnolia',
 'marigold',
 'mexican aster',
 'monkshood',
 'morning glory',
 'orange dahlia',
 'osteospermum',
 'oxeye daisy',
 'passion flower',
 'pelargonium',
 'peruvian lily',
 'petunia',
 'pincushion flower',
 'pink primrose',

In [23]:
images_tensor = torch.stack(images)
images_tensor = images_tensor.permute(0, 3, 1, 2)
images_tensor.shape


torch.Size([7000, 3, 500, 500])

In [31]:
def encode_data(images_tensor, texts, device: str):

  # preprocess the texts to transform from text to tensors
  images_tensor.to(device)
  text_tokens = clip.tokenize(["A photo of " + desc for desc in texts]).to(device)

  # encode the inputs
  with torch.no_grad():
    images_z = model.encode_image(images_tensor).float()
    texts_z = model.encode_text(text_tokens).float()

  return images_z, texts_z

In [32]:
def cosine_similarity(images_z: torch.Tensor, texts_z: torch.Tensor):
  # normalise the image and the text
  images_z /= images_z.norm(dim=-1, keepdim=True)
  texts_z /= texts_z.norm(dim=-1, keepdim=True)

  # compute the dot product between the image and the text
  similarity = (texts_z @ images_z.T)

  return similarity.cpu()

images_z, texts_z = encode_data(images_tensor, labels, device)
similarity = cosine_similarity(images_z, texts_z)
print(similarity)

: 

In [None]:
def encode_data(images_fp: list[str], texts: list[str], device: str):
  # preprocess the images to transform from filenames to images to tensors
  images = [preprocess(Image.open(image)) for image in images_fp]

  # preprocess the texts to transform from text to tensors
  images = torch.tensor(np.stack(images)).to(device)
  text_tokens = clip.tokenize(["A photo of " + desc for desc in texts]).to(device)

  # encode the inputs
  with torch.no_grad():
    images_z = model.encode_image(images).float()
    texts_z = model.encode_text(text_tokens).float()

  return images_z, texts_z

In [11]:
def encode_data(images_fp: list[str], texts: list[str], device: str):
  # preprocess the images to transform from filenames to images to tensors
  images = [preprocess(Image.open(image)) for image in images_fp]

  # preprocess the texts to transform from text to tensors
  images = torch.tensor(np.stack(images)).to(device)
  text_tokens = clip.tokenize(["A photo of " + desc for desc in texts]).to(device)

  # encode the inputs
  with torch.no_grad():
    images_z = model.encode_image(images).float()
    texts_z = model.encode_text(text_tokens).float()

  return images_z, texts_z

In [12]:
images = []
labels = set()

for i in range(len(dataset)):
    image, label = dataset.__getitem__(i)
    image = torch.tensor(image)
    images.append(image)
    labels.add(flowers_names[str(int(label))])  # Assuming class names are stored as strings in the json

In [13]:
images_tensor = torch.stack(images)

In [14]:
images_tensor.shape

torch.Size([8189, 500, 500, 3])

torch.Size([8189, 3, 500, 500])

In [16]:
def encode_data(images: list[torch.Tensor], texts: list[str], device: str, model):
    text_tokens = clip.tokenize(["A photo of " + desc for desc in texts]).to(device)

    with torch.no_grad():
        images_z = model.encode_image(images).float()
        texts_z = model.encode_text(text_tokens).float()

    return images_z, texts_z

In [17]:
def cosine_similarity(images_z: torch.Tensor, texts_z: torch.Tensor):
    images_z /= images_z.norm(dim=-1, keepdim=True)
    texts_z /= texts_z.norm(dim=-1, keepdim=True)

    similarity = (texts_z @ images_z.T)

    return similarity.cpu()

images_z, texts_z = encode_data(images_tensor, labels, device, model)
similarity = cosine_similarity(images_z, texts_z)
print(similarity)


: 