https://colab.research.google.com/github/sparsh-ai/rec-tutorials/blob/master/_notebooks/2021-04-27-image-similarity-recommendations.ipynb#scrollTo=xS6CNOmrUfmi

In [4]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_data = np.concatenate([x_train, x_test], axis=0)
y_data = np.concatenate([y_train, y_test], axis=0)

print("Shape das imagens:", x_data.shape)
print("Shape dos labels:", y_data.shape)


Shape das imagens: (70000, 28, 28)
Shape dos labels: (70000,)


In [5]:
def preprocess_images(images, max_images=None):
    images_rgb = []
    n = max_images if max_images is not None else len(images)
    for img in tqdm(images[:n]):

        img_pil = array_to_img(img.reshape(28,28,1)).convert("RGB").resize((224,224))
        img_rgb = img_to_array(img_pil)
        images_rgb.append(img_rgb)
    images_rgb = np.array(images_rgb)
    images_rgb = preprocess_input(images_rgb)
    return images_rgb

x_data_rgb = preprocess_images(x_data, max_images=5000)
y_data_small = y_data[:5000]
print("Shape após preprocessamento:", x_data_rgb.shape)


100%|██████████| 5000/5000 [00:06<00:00, 740.68it/s]


Shape após preprocessamento: (5000, 224, 224, 3)


In [None]:

base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

embeddings = model.predict(x_data_rgb, batch_size=32, verbose=1)
print("Shape dos embeddings:", embeddings.shape)


[1m112/157[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m4:19[0m 6s/step

In [None]:
def recommend_similar(query_idx, embeddings, top_k=5):
    query_embedding = embeddings[query_idx].reshape(1, -1)
    similarities = cosine_similarity(query_embedding, embeddings)[0]

    top_idx = similarities.argsort()[-(top_k+1):-1][::-1]
    return top_idx, similarities[top_idx]


In [None]:
import matplotlib.pyplot as plt

query_idx = 0
top_idx, sim_scores = recommend_similar(query_idx, embeddings, top_k=5)

plt.imshow(x_data[query_idx], cmap='gray')
plt.title(f"Imagem de referência: Label={y_data_small[query_idx]}")
plt.axis('off')
plt.show()

fig, axes = plt.subplots(1,5, figsize=(15,3))
for i, ax in enumerate(axes):
    ax.imshow(x_data[top_idx[i]], cmap='gray')
    ax.set_title(f"Label={y_data_small[top_idx[i]]}\nSim={sim_scores[i]:.2f}")
    ax.axis('off')
plt.show()
