In [None]:
!python3 -m pip install faiss-cpu

In [None]:
# !pip uninstall clip
!pip install git+https://github.com/openai/CLIP.git

In [None]:
import clip

In [None]:
import roboflow

roboflow.login()

roboflow.download_dataset(dataset_url="https://universe.roboflow.com/team-roboflow/coco-128/dataset/2", model_format="coco", location="data/coco-128")

In [None]:
!pip3 install roboflow

In [None]:
def create_faiss_index(embeddings):
    # Determine the dimensionality of the embeddings
    d = 512
    
    # Initialize a FAISS index
    index = faiss.IndexFlatL2(d)

    # Add embeddings to the index
    index.add(embeddings)
    
    return index

def load_faiss_index(index_path, image_paths_path):
    # Load FAISS index
    index = faiss.read_index(index_path)
    
    # Load image paths
    with open(image_paths_path, "r") as f:
        image_paths = json.load(f)
    
    return index, image_paths

In [None]:
import glob
import json
from PIL import Image
from pathlib import Path

import clip
import faiss
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset


class ImageDataset(Dataset):
    def __init__(self, image_paths, preprocess):
        self.image_paths = image_paths
        self.preprocess = preprocess

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path)
        image = self.preprocess(image)
        return image


def get_data_paths(dir: str | list[str], data_formats: list, prefix: str = '') -> list[str]:
    """
    Get list of files in a folder that have a file extension in the data_formats.

    Args:
      dir (str | list[str]): Dir or list of dirs containing data.
      data_formats (list): List of file extensions. Ex: ['jpg', 'png']
      prefix (str): Prefix for logging messages.

    Returns:
      A list of strings.
    """
    try:
        f = []  # data files
        for d in dir if isinstance(dir, list) else [dir]:
            p = Path(d)
            if p.is_dir():
                f += glob.glob(str(p / '**' / '*.*'), recursive=True)
            else:
                raise FileNotFoundError(f'{prefix}{p} does not exist')
        data_files = sorted(x for x in f if x.split('.')[-1].lower() in data_formats)
        return data_files
    except Exception as e:
        raise Exception(f'{prefix}Error loading data from {dir}: {e}') from e


def get_image_embeddings(data_dir, model_name="ViT-B/32", batch_size=32, device="cpu"):
    # Load the CLIP model
    model, preprocess = clip.load(model_name, device=device)
    
    # Create a dataset and dataloader
    image_paths = get_data_paths(data_dir, data_formats=["jpg", "jpeg", "png"])
    print(len(image_paths))
    dataset = ImageDataset(image_paths, preprocess)
    dataloader = DataLoader(dataset, batch_size=batch_size)
    
    # List to store image embeddings
    image_embeddings = []

    # Process images in batches
    with torch.no_grad():
        for images in dataloader:
            images = images.to(device)
            embeddings = model.encode_image(images)
            embeddings /= embeddings.norm(dim=-1, keepdim=True)
            image_embeddings.append(embeddings.cpu().numpy())

    # Convert list to numpy array
    image_embeddings = np.vstack(image_embeddings)
    
    return image_embeddings, image_paths


def create_faiss_index(embeddings):
    # Determine the dimensionality of the embeddings
    d = 512
    
    # Initialize a FAISS index
    index = faiss.IndexFlatL2(d)
    
    # Add embeddings to the index
    index.add(embeddings)
    
    return index


embeddings, image_paths = get_image_embeddings("data/coco-128/train", device="cpu")
# index = create_faiss_index(embeddings)
# faiss.write_index(index, "data/index.faiss")
# with open("data/image_paths.json", "w") as f:
#     json.dump(image_paths, f, indent=4)
    
filepath_mapping = {idx: path for idx, path in enumerate(image_paths)}
with open("data/filepath.json", "w") as f:
    json.dump(filepath_mapping, f, indent=4)

In [None]:
def get_similar_images(images, index, image_paths, top_k=5, model_name="ViT-B/32", device="cpu"):
    # Load the CLIP model
    clip_model, clip_preprocess = clip.load(model_name, device=device)
    
    # Preprocess and get embeddings for input images
    processed_images = torch.stack([clip_preprocess(img) for img in images]).to(device)
    
    with torch.no_grad():
        embeddings = clip_model.encode_image(processed_images)
        embeddings /= embeddings.norm(dim=-1, keepdim=True)
        embeddings = embeddings.cpu().numpy()
    
    # Search for similar images
    D, I = index.search(embeddings, top_k)
    
    # Get the paths of the similar images
    similar_images = [[image_paths[i] for i in indices] for indices in I]
    print(similar_images)
    similar_images = [[Image.open(img_path) for img_path in paths] for paths in similar_images]
    
    return similar_images

# Example usage
device = "cuda" if torch.cuda.is_available() else "cpu"
index, image_paths = load_faiss_index("data/index.faiss", "data/image_paths.json")


query_images = ["data/coco-128/test/000000000034_jpg.rf.b518abdaed199dcb88854cf20fce8078.jpg", "data/coco-128/test/000000000283_jpg.rf.27927692baf616a7456bb3e24c21bfd7.jpg"]  # replace with your image paths
query_images = [Image.open(img_path) for img_path in query_images]
similar_images = get_similar_images(query_images , index, image_paths)

In [None]:
import faiss
import json
import numpy as np
from PIL import Image

INDEX_PATH = "data/index.faiss"
IMAGE_PATHS_PATH = "data/image_paths.json"
NUM_SAMPLES = 2  # Number of samples to retrieve

def load_faiss_index(index_path, image_paths_path):
    # Load FAISS index
    index = faiss.read_index(index_path)
    
    # Load image paths
    with open(image_paths_path, "r") as f:
        image_paths = json.load(f)
    
    return index, image_paths

def get_samples_from_index(index, image_paths, num_samples=NUM_SAMPLES):
    # Get the total number of entries in the index
    num_entries = index.ntotal
    
    # Select random indices to retrieve
    sample_indices = np.random.choice(num_entries, num_samples, replace=False)
    
    # Retrieve the embeddings and corresponding image paths
    sample_embeddings = []
    sample_image_paths = []
    
    for idx in sample_indices:
        sample_embeddings.append(index.reconstruct(int(idx)))
        sample_image_paths.append(image_paths[int(idx)])
    
    return sample_embeddings, sample_image_paths

def display_sample_images(sample_image_paths):
    for image_path in sample_image_paths:
        img = Image.open(image_path)
        img.show()

if __name__ == "__main__":
    # Load the FAISS index and image paths
    index, image_paths = load_faiss_index(INDEX_PATH, IMAGE_PATHS_PATH)
    
    # Get samples from the index
    sample_embeddings, sample_image_paths = get_samples_from_index(index, image_paths)
    
    # Print sample embeddings and their corresponding image paths
    for i, (embedding, path) in enumerate(zip(sample_embeddings, sample_image_paths)):
        print(f"Sample {i+1}:")
        print(f"Image Path: {path}")
        print(f"Embedding: {embedding}")
        print()

    # Optionally display the images
    display_sample_images(sample_image_paths)


In [None]:
import faiss
import clip
import json
import numpy as np
from PIL import Image

def get_image_embeddings(data_dir, model_name="ViT-B/32", batch_size=32, device="cpu"):
    # Load the CLIP model
    model, preprocess = clip.load(model_name, device=device)
    
    # Create a dataset and dataloader
    image_paths = get_data_paths(data_dir, data_formats=["jpg", "jpeg", "png"])
    print(len(image_paths))
    dataset = ImageDataset(image_paths, preprocess)
    dataloader = DataLoader(dataset, batch_size=batch_size)
    
    # List to store image embeddings
    image_embeddings = []

    # Process images in batches
    with torch.no_grad():
        for images in dataloader:
            images = images.to(device)
            embeddings = model.encode_image(images)
            embeddings /= embeddings.norm(dim=-1, keepdim=True)
            image_embeddings.append(embeddings.cpu().numpy())

    # Convert list to numpy array
    image_embeddings = np.vstack(image_embeddings)
    
    return image_embeddings, image_paths

embeddings, image_paths = get_image_embeddings("data/coco-128/train", device="cpu")

# Create and save filepath mapping
filepath_mapping = {idx: path for idx, path in enumerate(image_paths)}
with open("data/filepath.json", "w") as f:
    json.dump(filepath_mapping, f, indent=4)

In [None]:
!pip install sentence_transformers

In [None]:
from sentence_transformers import SentenceTransformer, util
from PIL import Image

#Load CLIP model
model = SentenceTransformer('clip-ViT-B-32')

#Encode an image:
img_emb = model.encode(Image.open('/Users/haphuongthao/Downloads/1a50811a14ee1a67b1fd7d7648fd24dd.png'))

#Encode text descriptions
text_emb = model.encode(['Two dogs in the snow', 'A cat on a table', 'A picture of London at night'])

#Compute cosine similarities 
cos_scores = util.cos_sim(img_emb, text_emb)
print(cos_scores)


In [None]:
img_emb

In [None]:
img_emb.size

In [None]:
img_emb