In [15]:
import pandas as pd
import requests
from io import BytesIO
from PIL import Image
import torch
import open_clip
import os
from tqdm import tqdm
import numpy as np

In [16]:
# pip install "numpy<2" --force-reinstall


In [17]:
import chromadb
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
from chromadb.utils.data_loaders import ImageLoader

# create a chromadb object which acts as main interface for adding collections(traditional db), docs and perform queries. 
chroma_client = chromadb.PersistentClient(path="data/pinteresty.db") #persistent client so it does not lose memory when we terminate the program

# instantiate image loader
image_loader = ImageLoader()

# instantiate multimodal embedding function
embedding_function = OpenCLIPEmbeddingFunction()

In [18]:
# !pip install chromadb pillow torch torchvision open_clip_torch tqdm


In [19]:
# create the collection, - vector database
pin_collection = chroma_client.get_or_create_collection(
    "pinteresty_collection",
    embedding_function=embedding_function,
    data_loader=image_loader,
)

In [20]:
df = pd.read_csv("/Users/riddhishah/Documents/GitHub/Multimodal-Item-Search-Engine/data.csv")
df.head()

Unnamed: 0,url,post_id,title,content,date_posted,user_name,user_url,user_id,followers,likes,categories,source,attached_files,image_video_url,video_length,hashtags,comments_num,comments,discovery_input,post_type
0,https://www.pinterest.com/pin/17381148553849109,17381148553849109,50 DIY Dog Toys You Can Make In No Time,If your home dog buddy is not so gentle with t...,"""2022-06-11T00:02:08.000Z""",allyboo24,https://www.pinterest.com/allyboo24,17381285970317537,151,3,"[""Tiere"",""Tiere Und Heimtierbedarf""]",,"[""https://i.pinimg.com/originals/6d/38/d7/6d38...",https://i.pinimg.com/originals/6d/38/d7/6d38d7...,0,"[""Hausgemachtes Hundespielzeug"",""Hunde Spielze...",2,"[""Nice thought, but oh-no-no-no. My dog would ...","{""keyword"":""dog toys""}",image
1,https://www.pinterest.com/pin/1084804628983062016,1084804628983062016,Dog Toys - Snuggly Cup,Golden Pooch Tennis Shoe Plush Dog Toy - Get t...,"""2024-05-01T12:30:48.000Z""",lilyjaneboutique1,https://www.pinterest.com/lilyjaneboutique1,1050746294219424452,73,5,"[""Animals""]",,"[""https://i.pinimg.com/originals/98/2d/85/982d...",https://i.pinimg.com/originals/98/2d/85/982d85...,0,"[""Makeup Dog Toys"",""Dog Stuff Aesthetic"",""Cute...",0,,"{""keyword"":""dog toys""}",image
2,https://www.pinterest.com/pin/355362226865072169,355362226865072169,Aesthetic Dog Finds,Shop Premium Dog Beds for Large Dogs … and oth...,"""2024-06-28T16:57:59.000Z""",hayleylarue,https://www.pinterest.com/hayleylarue,267049590312665613,68385,10,"[""Animals"",""Animals And Pet Supplies"",""Dog Sup...",,"[""https://i.pinimg.com/originals/6b/17/83/6b17...",https://i.pinimg.com/originals/6b/17/83/6b1783...,0,"[""Aesthetic Dog Room Ideas"",""Aesthetic Dog Dec...",0,,"{""keyword"":""dog toys""}",image
3,https://www.pinterest.com/pin/694328467581318715,694328467581318715,Pup Cup Tumbler Plush Squeaker Dog Toy - Blue,Introducing the Pup Cup Tumbler Dog Toy - wher...,"""2024-04-06T12:32:54.000Z""",miamorepets,https://www.pinterest.com/miamorepets,694328604959557638,447,14,"[""Animals""]",,"[""https://i.pinimg.com/originals/d3/b9/25/d3b9...",https://i.pinimg.com/originals/d3/b9/25/d3b925...,0,"[""Things To Get Your Dog"",""Cute Stuff For Dogs...",2,"[""i dont think thats blue.. (check the title)""...","{""keyword"":""dog toys""}",image
4,https://www.pinterest.com/pin/986710599584098709,986710599584098709,The 50 Best Interactive Dog Toys for Brain Sti...,For pets who need brain stimulation and fight ...,"""2022-07-22T04:23:33.000Z""",topdogtips,https://www.pinterest.com/topdogtips,542050642559011518,76098,4,"[""Tiere""]",,"[""https://i.pinimg.com/originals/28/88/11/2888...",https://i.pinimg.com/originals/28/88/11/288811...,0,"[""Hunde Spielzeug Diy"",""Hündchen Übung"",""Hunde...",4,"[""Looks like fun for the Dogs"",""Works like a k...","{""keyword"":""dog toys""}",image


In [21]:
def load_image_from_url(url):
    try:
        resp = requests.get(url, timeout=10)
        img = Image.open(BytesIO(resp.content)).convert("RGB")
        return img
    except:
        return None


In [22]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model, _, preprocess = open_clip.create_model_and_transforms(
    model_name="ViT-B-32",
    pretrained="openai"
)
tokenizer = open_clip.get_tokenizer("ViT-B-32")
model = model.to(device)



In [23]:
def encode_image(path):
    img = preprocess(Image.open(path).convert("RGB")).unsqueeze(0).to(device)
    with torch.no_grad():
        emb = model.encode_image(img)
    return emb.cpu().numpy()[0].tolist()

In [24]:
def encode_text(text):
    tokens = tokenizer([text]).to(device)
    with torch.no_grad():
        emb = model.encode_text(tokens)
    return emb.cpu().numpy()[0].tolist()


In [25]:
# pip install --upgrade --force-reinstall numpy
# 

In [None]:
import numpy as np
ids = []
embeddings = []
metadatas = []

for idx, row in df.iterrows():
    url = row["image_video_url"]

    img = load_image_from_url(url)
    if img is None:
        continue

    # preprocess from OpenCLIP
    inp = preprocess(img).unsqueeze(0).to(device)
    with torch.no_grad():
        emb = model.encode_image(inp).cpu().numpy()[0].tolist()

    ids.append(str(row["post_id"]))  # or index
    embeddings.append(emb)
    metadatas.append({
        "url": url,
        "title": row["title"],
        "post_id": str(row["post_id"]),
    })

pin_collection.add(
    ids=ids,
    embeddings=embeddings,
    metadatas=metadatas
)


RuntimeError: Numpy is not available

In [None]:
# !pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
