In [None]:
# Cell 1: imports
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import json, os
from PIL import Image
import matplotlib.pyplot as plt


EMB = 'outputs/image_embeddings_clean.csv'
OUT = 'outputs/recommendation.json'


# Cell 2: load and compute similarity
df = pd.read_csv(EMB)
filenames = df['filename'].tolist()
X = df.drop(columns=['filename']).values
# normalize
X = X / (np.linalg.norm(X,axis=1,keepdims=True)+1e-12)
sim = cosine_similarity(X)


# Cell 3: build recommendations
recs = {}
for i,fname in enumerate(filenames):
inds = sim[i].argsort()[::-1][1:11]
recs[fname] = [(filenames[j], float(sim[i,j])) for j in inds]
with open(OUT,'w') as f:
json.dump(recs, f, indent=2)
print('Saved', OUT)


# Cell 4: visualize for a few examples
sample = filenames[:5]
for s in sample:
print('Query:', s)
top = recs[s][:5]
plt.figure(figsize=(12,3))
for idx,(fn,score) in enumerate([(s,1.0)]+top):
img = Image.open('datasets/images/' + fn)
plt.subplot(1,6,idx+1); plt.imshow(img); plt.title(f'{score:.2f}'); plt.axis('off')
plt.show()