# Sistema de Recomendação por Imagens

Este notebook implementa um sistema de recomendação de produtos baseado em similaridade visual, adaptado para execução no Google Colab.

## 1. Configuração do Ambiente

Primeiro, instalamos as dependências necessárias e garantimos que o TensorFlow não utilizará a GPU, forçando a execução em CPU para consistência.

In [1]:
!pip install numpy tensorflow Pillow scikit-learn keras



In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Desabilita GPUs

import tensorflow as tf
print('TensorFlow rodando em:', 'CPU' if not tf.config.list_physical_devices('GPU') else 'GPU') # Confirma o dispositivo

TensorFlow rodando em: CPU


## 2. Download e Preparação dos Dados

Baixamos o dataset Caltech 101 e o extraímos.

In [12]:
!wget https://data.caltech.edu/records/mzrjq-6wc02/files/101_ObjectCategories.tar.gz
!tar -xzf 101_ObjectCategories.tar.gz

--2025-09-17 06:29:36--  https://data.caltech.edu/records/mzrjq-6wc02/files/101_ObjectCategories.tar.gz
Resolving data.caltech.edu (data.caltech.edu)... 35.155.11.48
Connecting to data.caltech.edu (data.caltech.edu)|35.155.11.48|:443... connected.
HTTP request sent, awaiting response... 500 INTERNAL SERVER ERROR
2025-09-17 06:29:36 ERROR 500: INTERNAL SERVER ERROR.

tar (child): 101_ObjectCategories.tar.gz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now


## 3. Definição das Classes e Funções

Aqui definimos todas as classes e funções necessárias para o sistema, como `FeatureExtractor`, `Recommender` e a função de treinamento.

In [5]:
import pickle
import keras
from keras.preprocessing import image
from keras.models import Model, load_model
from keras.applications.vgg16 import preprocess_input
import numpy as np
import os

class FeatureExtractor:
    def __init__(self, model_path):
        base_model = load_model(model_path)
        self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('feature_extraction_layer').output)

    def _preprocess_image(self, img_path):
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        return x

    def extract_feature(self, img_path):
        x = self._preprocess_image(img_path)
        feature = self.model.predict(x)[0]
        return feature / np.linalg.norm(feature)

    def extract_features_from_paths(self, path_list):
        features = {}
        for img_path in path_list:
            try:
                feature = self.extract_feature(img_path)
                features[img_path] = feature
            except Exception as e:
                print(f"Erro ao processar {img_path}: {e}")
        return features

    def save_features(self, features, filepath="features.pkl"):
        with open(filepath, 'wb') as f:
            pickle.dump(features, f)

    def load_features(self, filepath="features.pkl"):
        with open(filepath, 'rb') as f:
            return pickle.load(f)

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

class Recommender:
    def __init__(self, features_dict):
        self.features_dict = features_dict
        self.image_paths = list(features_dict.keys())
        self.feature_list = list(features_dict.values())

    def get_recommendations(self, query_image_path, top_k=5):
        if query_image_path not in self.features_dict:
            raise ValueError("Imagem de consulta não encontrada no dicionário de features.")

        query_feature = self.features_dict[query_image_path]
        similarities = cosine_similarity([query_feature], self.feature_list)[0]
        scores = list(zip(self.image_paths, similarities))
        sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)

        final_recommendations = []
        for path, score in sorted_scores:
            if path != query_image_path:
                final_recommendations.append((path, score))
            if len(final_recommendations) == top_k:
                break

        return final_recommendations

In [8]:
import shutil

def train_model(data_dir, selected_classes, model_save_path, epochs=10, batch_size=32, img_height=224, img_width=224):
    temp_data_dir = "temp_data_for_training"
    if os.path.exists(temp_data_dir):
        shutil.rmtree(temp_data_dir)

    for class_name in selected_classes:
        shutil.copytree(os.path.join(data_dir, class_name), os.path.join(temp_data_dir, class_name))

    print(f"Dados das classes {selected_classes} copiados para {temp_data_dir}")

    print("Carregando e dividindo o dataset...")
    train_ds = keras.utils.image_dataset_from_directory(
        temp_data_dir,
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )

    val_ds = keras.utils.image_dataset_from_directory(
        temp_data_dir,
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )

    class_names = train_ds.class_names
    print(f"Classes encontradas para o treinamento: {class_names}")

    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

    print("Construindo o modelo com base no VGG16...")
    base_model = keras.applications.VGG16(
        input_shape=(img_height, img_width, 3),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    inputs = keras.Input(shape=(img_height, img_width, 3))
    x = keras.applications.vgg16.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = keras.layers.GlobalAveragePooling2D(name="feature_extraction_layer")(x)
    x = keras.layers.Dense(128, activation='relu')(x)
    outputs = keras.layers.Dense(len(class_names), activation='softmax')(x)

    model = keras.Model(inputs, outputs)

    print("Compilando e iniciando o treinamento...")
    model.compile(
        optimizer='adam',
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )

    model.summary()

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs
    )

    print(f"Salvando o modelo treinado em: {model_save_path}")
    model.save(model_save_path)

    shutil.rmtree(temp_data_dir)
    print(f"Diretório temporário {temp_data_dir} removido.")

## 4. Execução Principal

Agora, executamos a lógica principal: treinar o modelo (se necessário), extrair features e obter recomendações para uma imagem aleatória.

In [13]:
import random
import glob

SELECTED_CLASSES = ['laptop', 'watch', 'cellphone', 'cup']
MODEL_PATH = "fine_tuned_model.keras"
FEATURES_PATH = "features_fine_tuned.pkl"
DATA_DIR = "101_ObjectCategories"

def get_random_image(root_dir, class_list):
    random_class = random.choice(class_list)
    class_dir = os.path.join(root_dir, random_class)
    all_images = glob.glob(os.path.join(class_dir, 'image_*.jpg')) # Ajustado para o dataset que usa .jpg
    return random.choice(all_images)

# 1. Treinar o modelo se necessário
if not os.path.exists(MODEL_PATH):
    print(f"Modelo {MODEL_PATH} não encontrado. Iniciando o treinamento...")
    train_model(DATA_DIR, SELECTED_CLASSES, MODEL_PATH)
else:
    print(f"Modelo {MODEL_PATH} encontrado.")

# 2. Extrair features
extractor = FeatureExtractor(model_path=MODEL_PATH)
if not os.path.exists(FEATURES_PATH):
    print(f"Arquivo de features {FEATURES_PATH} não encontrado. Extraindo features...")
    image_paths_to_process = []
    for class_name in SELECTED_CLASSES:
        class_dir = os.path.join(DATA_DIR, class_name)
        # Ajustado para o dataset que usa .jpg
        image_paths_to_process.extend(glob.glob(os.path.join(class_dir, 'image_*.jpg')))

    features = extractor.extract_features_from_paths(image_paths_to_process)
    extractor.save_features(features, FEATURES_PATH)
    print(f"Features salvas em: {FEATURES_PATH}")
else:
    print(f"Carregando features de: {FEATURES_PATH}")
    features = extractor.load_features(FEATURES_PATH)

# 3. Obter recomendações
query_image_path = get_random_image(DATA_DIR, SELECTED_CLASSES)
query_feature = extractor.extract_feature(query_image_path) # Extrai a feature da imagem de consulta

# Adiciona a feature da imagem de consulta ao dicionário se não estiver lá
if query_image_path not in features:
    features[query_image_path] = query_feature

recommender = Recommender(features)
recommendations = recommender.get_recommendations(query_image_path, top_k=5)

# 4. Exibir resultados
print(f"Buscando recomendações para: {query_image_path}")
print("Imagens recomendadas:")
if recommendations:
    for img_path, score in recommendations:
        print(f"- {img_path} (Similaridade: {score:.4f})")
else:
    print("Nenhuma recomendação encontrada.")

Modelo fine_tuned_model.keras não encontrado. Iniciando o treinamento...


FileNotFoundError: [Errno 2] No such file or directory: '101_ObjectCategories/laptop'