Sistema de Recomendación - CF basado en la vecinos

In [3]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m5.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357284 sha256=acc28267d6959a7e0300ff9e77fea50ff53f31d3390dda9873d4bbd43dc2247e
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a

In [10]:
from surprise import Dataset, Reader
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd

# Cargamos el csv
csv_path = '/content/users-score-anime.csv'
csv_data = pd.read_csv(csv_path)

# Pasamos los datos del csv al formato de Surprise
reader = Reader(rating_scale=(csv_data.scoreByUser.min(), csv_data.scoreByUser.max()))
data = Dataset.load_from_df(csv_data[['user_id', 'anime_id', 'scoreByUser']], reader)

# Conjunto de entrenamiento y prueba
trainset, testset = train_test_split(data, test_size=0.2)

# Configuraciones de parámetros
models = {
    "KNNBasic": KNNBasic,
    "KNNWithMeans": KNNWithMeans,
    "KNNWithZScore": KNNWithZScore,
    "KNNBaseline": KNNBaseline,
}
similarity_measures = ["cosine", "pearson", "pearson_baseline"]
user_item_based = [True, False]

# Diccionario para almacenar los resultados
results = []

# Probamos todas las combinaciones
for model_name, model_class in models.items():
    for sim_measure in similarity_measures:
        for user_based in user_item_based:
            # Opciones de similitud
            sim_options = {
                "name": sim_measure,
                "user_based": user_based,
            }

            # Creación y entrenamiento del modelo
            algo = model_class(sim_options=sim_options)
            algo.fit(trainset)

            # Evaluación del modelo con los datos de prueba
            predictions = algo.test(testset)

            # Calculo de métricas
            rmse = accuracy.rmse(predictions, verbose=False)
            mae = accuracy.mae(predictions, verbose=False)

            # Guardamos los resultados
            results.append({
                "Model": model_name,
                "Similarity": sim_measure,
                "User-Based": user_based,
                "RMSE": rmse,
                "MAE": mae,
            })

# DataFrame para mostrar los resultados
results_df = pd.DataFrame(results)
sorted_results = results_df.sort_values(by=["RMSE", "MAE"], ascending=True)

# Los mejores resultados
print("Resultados ordenados por RMSE y MAE:")
print(sorted_results.head())

# La Mejor configuración
best_model = sorted_results.iloc[0]
print("\nMejor configuración:")
print(best_model)


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Comput

In [14]:
from surprise import KNNWithMeans

# Declaramos el modelo ganador
best_model_sim_options = {
    "name": "cosine",  # Similaridad basada en coseno
    "user_based": False,  # Similaridad basada en ítems
}
best_model = KNNWithMeans(sim_options=best_model_sim_options)

# Entrenamos el modelo ganador
best_model.fit(trainset)

# Obtenemos predicciones en el conjunto de prueba
predictions = best_model.test(testset)

# Evaluación de métricas
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, precision_recall_fscore_support
import numpy as np

# Definir umbral para recomendaciones relevantes
threshold = 7.0

# Preparar listas para métricas
y_true = []
y_pred = []
true_ratings = []
predicted_ratings = []

for pred in predictions:
    true_ratings.append(pred.r_ui)
    predicted_ratings.append(pred.est)

    # Etiquetas binarias para precisión, recall y F1
    if pred.r_ui >= threshold:
        y_true.append(1)
    else:
        y_true.append(0)

    if pred.est >= threshold:
        y_pred.append(1)
    else:
        y_pred.append(0)

# Calculamos RMSE, MAE, MSE, R2
rmse = np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
mae = mean_absolute_error(true_ratings, predicted_ratings)
mse = mean_squared_error(true_ratings, predicted_ratings)
r2 = r2_score(true_ratings, predicted_ratings)

# Calculamos precisión, recall y F1
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")

# Resultados obtenidos del modelo
print("Evaluación del Modelo Ganador:")
print(f"Modelo: KNNWithMeans")
print(f"Similitud: Cosine")
print(f"Basado en: Ítems")
print(f"RMSE: {rmse:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")
print(f"Precisión: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluación del Modelo Ganador:
Modelo: KNNWithMeans
Similitud: Cosine
Basado en: Ítems
RMSE: 1.3710
MSE: 1.8797
MAE: 1.0264
R²: 0.2945
Precisión: 0.8734
Recall: 0.8022
F1-Score: 0.8363


In [17]:
# Algunas predicciones de ejemplo
print("Predicciones de ejemplo para KNNWithMeans con pearson:")
for pred in predictions[:5]:
    print(pred)


Predicciones de ejemplo para KNNWithMeans con pearson:
user: 1952       item: 517.0      r_ui = 7.00   est = 8.20   {'actual_k': 39, 'was_impossible': False}
user: 6919       item: 1735.0     r_ui = 6.00   est = 6.67   {'actual_k': 40, 'was_impossible': False}
user: 6132       item: 10500.0    r_ui = 7.00   est = 7.02   {'actual_k': 40, 'was_impossible': False}
user: 6520       item: 339.0      r_ui = 5.00   est = 8.80   {'actual_k': 12, 'was_impossible': False}
user: 6111       item: 25649.0    r_ui = 7.00   est = 6.82   {'actual_k': 40, 'was_impossible': False}
