In [32]:
# Exploración inicial del dataset MovieLens 1M
print('Cantidad de usuarios únicos:', ratings['userId'].nunique())
print('Cantidad de películas únicas:', ratings['movieId'].nunique())
print('Formato de ratings (valores únicos):', sorted(ratings['rating'].unique()))
print('Ejemplo de ratings:', ratings['rating'].head(10).tolist())

Cantidad de usuarios únicos: 6040
Cantidad de películas únicas: 3706
Formato de ratings (valores únicos): [1, 2, 3, 4, 5]
Ejemplo de ratings: [5, 3, 3, 4, 5, 3, 5, 5, 4, 4]


In [33]:
# Formato de la tabla ratings
print('Columnas de ratings:', ratings.columns.tolist())
print('Tipos de datos:')
print(ratings.dtypes)
print('Primeras filas:')
print(ratings.head())

Columnas de ratings: ['userId', 'movieId', 'rating', 'timestamp']
Tipos de datos:
userId       int64
movieId      int64
rating       int64
timestamp    int64
dtype: object
Primeras filas:
   userId  movieId  rating  timestamp
0       1     1193       5  978300760
1       1      661       3  978302109
2       1      914       3  978301968
3       1     3408       4  978300275
4       1     2355       5  978824291


In [34]:
# Cantidad de ratings por película y por usuario
print('Ratings por película (media, min, max):',
      ratings['movieId'].value_counts().mean(),
      ratings['movieId'].value_counts().min(),
      ratings['movieId'].value_counts().max())
print('Ratings por usuario (media, min, max):',
      ratings['userId'].value_counts().mean(),
      ratings['userId'].value_counts().min(),
      ratings['userId'].value_counts().max())

Ratings por película (media, min, max): 269.88909875876953 1 3428
Ratings por usuario (media, min, max): 165.5975165562914 20 2314


# Sistema de recomendación con MovieLens 1M
Este notebook  se basa en el entrenamiento y validacion de un modelo de filtrado colaborativo usando SVD sobre el dataset MovieLens 1M.

In [35]:
# Descargar y descomprimir MovieLens 1M
!wget -nc https://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip -n ml-1m.zip -d ./

File ‘ml-1m.zip’ already there; not retrieving.

Archive:  ml-1m.zip


In [36]:
import pandas as pd
import numpy as np
# Cargar ratings
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', engine='python', names=['userId', 'movieId', 'rating', 'timestamp'])
# Cargar películas
movies = pd.read_csv('ml-1m/movies.dat', sep='::', engine='python', names=['movieId', 'title', 'genres'], encoding='latin-1')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [37]:
# Crear matriz usuario-película
user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
user_movie_matrix.shape

(6040, 3706)

In [60]:
# validación Split Data
from sklearn.model_selection import KFold
from scipy.sparse.linalg import svds
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

cv_folds = 5
kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
rmse_scores = []
mae_scores = []

for train_idx, test_idx in kf.split(ratings):
    train_ratings = ratings.iloc[train_idx]
    test_ratings = ratings.iloc[test_idx]
    
    # Crear matriz usuario-película
    train_matrix = train_ratings.pivot(index='userId', columns='movieId', values='rating')
    
    # CLAVE: Calcular medias SOLO sobre valores reales (ignorando NaN)
    user_means = train_matrix.mean(axis=1).fillna(0)
    
    # Normalizar ANTES de rellenar
    matrix_norm = train_matrix.sub(user_means, axis=0)
    
    # AHORA sí rellenar con 0 (representa "desviación desconocida")
    matrix_norm_filled = matrix_norm.fillna(0).values
    
    # SVD
    U, sigma, Vt = svds(matrix_norm_filled, k=20)
    sigma = np.diag(sigma)
    
    # Reconstruir y desnormalizar
    all_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_means.values.reshape(-1, 1)
    
    # Limitar predicciones al rango válido
    all_predicted_ratings = np.clip(all_predicted_ratings, 1, 5)
    
    preds_train_df = pd.DataFrame(
        all_predicted_ratings, 
        columns=train_matrix.columns, 
        index=train_matrix.index
    )
    
    # Evaluar en test
    test_filtered = test_ratings[
        test_ratings['movieId'].isin(preds_train_df.columns) & 
        test_ratings['userId'].isin(preds_train_df.index)
    ]
    
    preds = []
    for _, row in test_filtered.iterrows():
        user = row['userId']
        movie = row['movieId']
        pred = preds_train_df.loc[user, movie]
        preds.append(pred)
    
    if len(preds) > 0:
        rmse = np.sqrt(mean_squared_error(test_filtered['rating'], preds))
        mae = mean_absolute_error(test_filtered['rating'], preds)
        rmse_scores.append(rmse)
        mae_scores.append(mae)

print(f'RMSE promedio: {np.mean(rmse_scores):.4f}')
print(f'MAE promedio: {np.mean(mae_scores):.4f}')

RMSE promedio: 0.9592
MAE promedio: 0.7592


In [62]:
# ========== MÉTRICAS ADICIONALES PARA SVD MANUAL ==========
from collections import defaultdict
import numpy as np
import random

# Configuración
k_top = 10
threshold = 3.5

# Volver a ejecutar UN FOLD para obtener predicciones
# (usamos el último fold del cross-validation anterior)
train_ratings = ratings.iloc[train_idx]
test_ratings = ratings.iloc[test_idx]

# Crear matriz usuario-película
train_matrix = train_ratings.pivot(index='userId', columns='movieId', values='rating')
user_means = train_matrix.mean(axis=1).fillna(0)
matrix_norm = train_matrix.sub(user_means, axis=0)
matrix_norm_filled = matrix_norm.fillna(0).values

# SVD
U, sigma, Vt = svds(matrix_norm_filled, k=20)
sigma = np.diag(sigma)

# Reconstruir
all_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_means.values.reshape(-1, 1)
all_predicted_ratings = np.clip(all_predicted_ratings, 1, 5)

preds_train_df = pd.DataFrame(
    all_predicted_ratings, 
    columns=train_matrix.columns, 
    index=train_matrix.index
)

# Filtrar test
test_filtered = test_ratings[
    test_ratings['movieId'].isin(preds_train_df.columns) & 
    test_ratings['userId'].isin(preds_train_df.index)
]


# ========== FUNCIONES DE EVALUACIÓN ==========
def precision_recall_at_k_manual(test_filtered, preds_df, k=10, threshold=3.5):
    """Calcula Precision@K y Recall@K para SVD manual"""
    
    # Agrupar por usuario
    user_predictions = defaultdict(list)
    
    for _, row in test_filtered.iterrows():
        user = row['userId']
        movie = row['movieId']
        true_rating = row['rating']
        pred_rating = preds_df.loc[user, movie]
        
        user_predictions[user].append((pred_rating, true_rating))
    
    precisions = []
    recalls = []
    
    for user, predictions in user_predictions.items():
        # Ordenar por predicción descendente
        predictions_sorted = sorted(predictions, key=lambda x: x[0], reverse=True)
        
        # Top-k
        top_k = predictions_sorted[:k]
        
        # Relevantes en top-k
        n_rel_and_rec_k = sum(1 for (_, true_r) in top_k if true_r >= threshold)
        
        # Total relevantes
        n_rel = sum(1 for (_, true_r) in predictions if true_r >= threshold)
        
        # Precision y Recall
        if k > 0:
            precisions.append(n_rel_and_rec_k / k)
        if n_rel > 0:
            recalls.append(n_rel_and_rec_k / n_rel)
    
    return np.mean(precisions) if precisions else 0, np.mean(recalls) if recalls else 0


def coverage_manual(test_filtered, preds_df, train_matrix, threshold=3.5):
    """Calcula coverage para SVD manual"""
    
    # Todas las películas en el dataset de entrenamiento
    all_movies = set(train_matrix.columns)
    
    # Películas recomendadas (con predicción >= threshold)
    recommended_movies = set()
    
    for _, row in test_filtered.iterrows():
        user = row['userId']
        movie = row['movieId']
        pred_rating = preds_df.loc[user, movie]
        
        if pred_rating >= threshold:
            recommended_movies.add(movie)
    
    coverage_score = len(recommended_movies) / len(all_movies) if len(all_movies) > 0 else 0
    return coverage_score


# ========== CALCULAR MÉTRICAS ==========
print("=" * 60)
print("SVD MANUAL - MÉTRICAS ADICIONALES")
print("=" * 60)

precision, recall = precision_recall_at_k_manual(test_filtered, preds_train_df, k=k_top, threshold=threshold)
cov = coverage_manual(test_filtered, preds_train_df, train_matrix, threshold=threshold)

print(f"Precision@{k_top}: {precision:.4f}")
print(f"Recall@{k_top}:    {recall:.4f}")
print(f"Coverage:     {cov:.4f} ({cov*100:.2f}% del catálogo)")

if precision + recall > 0:
    f1 = 2 * (precision * recall) / (precision + recall)
    print(f"F1-Score:     {f1:.4f}")


# ========== EJEMPLOS: PREDICCIONES VS REALES ==========
print("\n" + "=" * 60)
print("EJEMPLOS: PREDICCIONES VS RATINGS REALES")
print("=" * 60)
print(f"{'Usuario':<10} {'Película':<10} {'Real':<8} {'Predicho':<10} {'Error':<8}")
print("-" * 60)

# Seleccionar muestra aleatoria
sample_size = min(20, len(test_filtered))
sample_indices = random.sample(range(len(test_filtered)), sample_size)

for idx in sample_indices:
    row = test_filtered.iloc[idx]
    user = row['userId']
    movie = row['movieId']
    true_rating = row['rating']
    pred_rating = preds_train_df.loc[user, movie]
    error = abs(true_rating - pred_rating)
    
    print(f"{user:<10} {movie:<10} {true_rating:<8.1f} {pred_rating:<10.2f} {error:<8.2f}")


# ========== DISTRIBUCIÓN DE ERRORES ==========
print("\n" + "=" * 60)
print("DISTRIBUCIÓN DE ERRORES")
print("=" * 60)

# Calcular todos los errores
errors = []
for _, row in test_filtered.iterrows():
    user = row['userId']
    movie = row['movieId']
    true_rating = row['rating']
    pred_rating = preds_train_df.loc[user, movie]
    error = abs(true_rating - pred_rating)
    errors.append(error)

print(f"Error promedio:      {np.mean(errors):.4f}")
print(f"Error mediano:       {np.median(errors):.4f}")
print(f"Error máximo:        {np.max(errors):.4f}")
print(f"Desviación estándar: {np.std(errors):.4f}")

print("\nDistribución de errores absolutos:")
bins = [0, 0.5, 1.0, 1.5, 2.0, 5.0]
for i in range(len(bins)-1):
    count = sum(1 for e in errors if bins[i] <= e < bins[i+1])
    percentage = (count / len(errors)) * 100
    print(f"  {bins[i]:.1f} - {bins[i+1]:.1f}: {count:6d} ({percentage:5.2f}%)")


# ========== ANÁLISIS POR USUARIO ==========
print("\n" + "=" * 60)
print("TOP 5 USUARIOS CON MEJOR/PEOR PREDICCIÓN")
print("=" * 60)

# Calcular error promedio por usuario
user_errors = defaultdict(list)
for _, row in test_filtered.iterrows():
    user = row['userId']
    movie = row['movieId']
    true_rating = row['rating']
    pred_rating = preds_train_df.loc[user, movie]
    error = abs(true_rating - pred_rating)
    user_errors[user].append(error)

# Promediar errores por usuario
user_avg_errors = {user: np.mean(errors) for user, errors in user_errors.items() if len(errors) >= 5}

if user_avg_errors:
    # Mejores usuarios (menor error)
    best_users = sorted(user_avg_errors.items(), key=lambda x: x[1])[:5]
    print("\nMejor predicción (menor error):")
    for user, avg_error in best_users:
        n_ratings = len(user_errors[user])
        print(f"  Usuario {user}: Error promedio = {avg_error:.4f} ({n_ratings} ratings)")
    
    # Peores usuarios (mayor error)
    worst_users = sorted(user_avg_errors.items(), key=lambda x: x[1], reverse=True)[:5]
    print("\nPeor predicción (mayor error):")
    for user, avg_error in worst_users:
        n_ratings = len(user_errors[user])
        print(f"  Usuario {user}: Error promedio = {avg_error:.4f} ({n_ratings} ratings)")

SVD MANUAL - MÉTRICAS ADICIONALES
Precision@10: 0.6796
Recall@10:    0.6371
Coverage:     0.8980 (89.80% del catálogo)
F1-Score:     0.6576

EJEMPLOS: PREDICCIONES VS RATINGS REALES
Usuario    Película   Real     Predicho   Error   
------------------------------------------------------------
3922       63         4.0      2.62       1.38    
3659       246        5.0      4.27       0.73    
5763       1060       4.0      3.60       0.40    
1701       1327       1.0      3.28       2.28    
3055       512        1.0      3.54       2.54    
268        2000       4.0      3.86       0.14    
4824       1090       5.0      3.89       1.11    
1794       1479       2.0      2.21       0.21    
551        1834       4.0      3.75       0.25    
3732       829        4.0      3.49       0.51    
4262       2174       4.0      3.60       0.40    
2586       1077       4.0      4.05       0.05    
5795       1918       1.0      1.08       0.08    
1607       2716       4.0      3.23       0

In [61]:
# --- Validación Cruzada K-Fold ---
from sklearn.model_selection import KFold
from scipy.sparse.linalg import svds
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Configurar K-Fold
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Almacenar métricas de cada fold
rmse_scores = []
mae_scores = []

print(f"Ejecutando validación cruzada con {n_splits} folds...\n")

# Iterar sobre los folds
for fold, (train_idx, test_idx) in enumerate(kf.split(ratings), 1):
    print(f"Fold {fold}/{n_splits}")
    
    # Dividir datos
    train_ratings = ratings.iloc[train_idx]
    test_ratings = ratings.iloc[test_idx]
    
    # Crear matriz usuario-película
    train_matrix = train_ratings.pivot(index='userId', columns='movieId', values='rating')
    
    # Calcular medias SOLO sobre valores reales (ignorando NaN)
    user_means = train_matrix.mean(axis=1).fillna(0)
    
    # Normalizar ANTES de rellenar
    matrix_norm = train_matrix.sub(user_means, axis=0)
    
    # Rellenar con 0 (desviación desconocida)
    matrix_norm_filled = matrix_norm.fillna(0).values
    
    # SVD
    U, sigma, Vt = svds(matrix_norm_filled, k=20)
    sigma = np.diag(sigma)
    
    # Reconstruir y desnormalizar
    all_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_means.values.reshape(-1, 1)
    
    # Limitar predicciones al rango válido
    all_predicted_ratings = np.clip(all_predicted_ratings, 1, 5)
    
    preds_train_df = pd.DataFrame(
        all_predicted_ratings, 
        columns=train_matrix.columns, 
        index=train_matrix.index
    )
    
    # Evaluar en test
    test_filtered = test_ratings[
        test_ratings['movieId'].isin(preds_train_df.columns) & 
        test_ratings['userId'].isin(preds_train_df.index)
    ]
    
    preds = []
    for _, row in test_filtered.iterrows():
        user = row['userId']
        movie = row['movieId']
        pred = preds_train_df.loc[user, movie]
        preds.append(pred)
    
    if len(preds) > 0:
        rmse = np.sqrt(mean_squared_error(test_filtered['rating'], preds))
        mae = mean_absolute_error(test_filtered['rating'], preds)
        
        rmse_scores.append(rmse)
        mae_scores.append(mae)
        
        print(f"  RMSE: {rmse:.4f}")
        print(f"  MAE: {mae:.4f}\n")

# Resultados finales
print("="*50)
print("RESULTADOS DE VALIDACIÓN CRUZADA")
print("="*50)
print(f"RMSE promedio: {np.mean(rmse_scores):.4f} (± {np.std(rmse_scores):.4f})")
print(f"MAE promedio:  {np.mean(mae_scores):.4f} (± {np.std(mae_scores):.4f})")
print("\nRMSE por fold:", [f"{score:.4f}" for score in rmse_scores])
print("MAE por fold: ", [f"{score:.4f}" for score in mae_scores])

Ejecutando validación cruzada con 5 folds...

Fold 1/5
  RMSE: 0.9633
  MAE: 0.7631

Fold 2/5
  RMSE: 0.9570
  MAE: 0.7575

Fold 3/5
  RMSE: 0.9580
  MAE: 0.7580

Fold 4/5
  RMSE: 0.9596
  MAE: 0.7592

Fold 5/5
  RMSE: 0.9584
  MAE: 0.7583

RESULTADOS DE VALIDACIÓN CRUZADA
RMSE promedio: 0.9592 (± 0.0022)
MAE promedio:  0.7592 (± 0.0020)

RMSE por fold: ['0.9633', '0.9570', '0.9580', '0.9596', '0.9584']
MAE por fold:  ['0.7631', '0.7575', '0.7580', '0.7592', '0.7583']


## Validación con Surprise (SVD y matrices dispersas)

In [56]:
# ========== VALIDACIÓN CON SURPRISE SVD ==========
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate
import numpy as np

# Hiperparámetros configurables
n_factors = 50         # Número de factores latentes
n_epochs = 20          # Número de épocas de entrenamiento
lr_all = 0.005         # Tasa de aprendizaje (learning rate)
reg_all = 0.02         # Parámetro de regularización
cv_folds = 5           # Cantidad de folds para validación cruzada

# Crear dataset para Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Configurar modelo SVD con gradient descent y regularización
algo = SVD(n_factors=n_factors, n_epochs=n_epochs, lr_all=lr_all, reg_all=reg_all)

# Validación cruzada
print("=" * 60)
print("SURPRISE SVD - VALIDACIÓN CRUZADA (5-Folds)")
print("=" * 60)
results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=cv_folds, verbose=True)

# Resultados finales
print("\n" + "=" * 60)
print("RESULTADOS FINALES")
print("=" * 60)
print(f'RMSE promedio: {np.mean(results["test_rmse"]):.4f} (± {np.std(results["test_rmse"]):.4f})')
print(f'MAE promedio:  {np.mean(results["test_mae"]):.4f} (± {np.std(results["test_mae"]):.4f})')

SURPRISE SVD - VALIDACIÓN CRUZADA (5-Folds)
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8708  0.8728  0.8697  0.8700  0.8708  0.8708  0.0011  
MAE (testset)     0.6844  0.6849  0.6828  0.6838  0.6840  0.6840  0.0007  
Fit time          3.33    3.30    3.50    3.73    4.19    3.61    0.33    
Test time         0.64    0.64    0.65    0.74    0.71    0.68    0.04    

RESULTADOS FINALES
RMSE promedio: 0.8708 (± 0.0011)
MAE promedio:  0.6840 (± 0.0007)


### Evaluación de Surprise SVD con split 80/20

In [57]:
# ========== EVALUACIÓN SURPRISE SVD - SPLIT 80/20 ==========
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split

# Hiperparámetros (usar los mismos definidos anteriormente)
# n_factors, n_epochs, lr_all, reg_all

# Crear dataset para Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split 80/20
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Entrenar modelo SVD
algo = SVD(n_factors=n_factors, n_epochs=n_epochs, lr_all=lr_all, reg_all=reg_all)
algo.fit(trainset)

# Predecir en test set
predictions = algo.test(testset)

# Calcular métricas
print("=" * 60)
print("SURPRISE SVD - SPLIT TEST (80/20)")
print("=" * 60)
rmse = accuracy.rmse(predictions, verbose=True)
mae = accuracy.mae(predictions, verbose=True)

SURPRISE SVD - SPLIT TEST (80/20)
RMSE: 0.8709
MAE:  0.6838


In [59]:
# ========== MÉTRICAS ADICIONALES: PRECISION, RECALL, COVERAGE ==========
from collections import defaultdict
import numpy as np

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """
    Calcula Precision@K y Recall@K
    
    Args:
        predictions: predicciones de Surprise
        k: número de recomendaciones top-k
        threshold: rating mínimo para considerar relevante
    """
    # Diccionarios por usuario
    user_est_true = defaultdict(list)
    
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))
    
    precisions = []
    recalls = []
    
    for uid, user_ratings in user_est_true.items():
        # Ordenar por predicción descendente
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        
        # Top-k predicciones
        top_k = user_ratings[:k]
        
        # Relevantes en top-k
        n_rel_and_rec_k = sum((true_r >= threshold) for (_, true_r) in top_k)
        
        # Total de relevantes para este usuario
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)
        
        # Precision@K
        precisions.append(n_rel_and_rec_k / k if k != 0 else 0)
        
        # Recall@K
        recalls.append(n_rel_and_rec_k / n_rel if n_rel != 0 else 0)
    
    precision = np.mean(precisions)
    recall = np.mean(recalls)
    
    return precision, recall


def coverage(predictions, trainset, threshold=3.5):
    """
    Calcula el coverage: porcentaje de ítems que el modelo recomienda
    """
    # Todos los ítems en el dataset
    all_items = set(trainset.all_items())
    
    # Ítems recomendados (con predicción >= threshold)
    recommended_items = set()
    for uid, iid, true_r, est, _ in predictions:
        if est >= threshold:
            recommended_items.add(iid)
    
    coverage_score = len(recommended_items) / len(all_items)
    return coverage_score


# ========== CALCULAR MÉTRICAS ==========
print("\n" + "=" * 60)
print("MÉTRICAS ADICIONALES")
print("=" * 60)

# Precision y Recall
precision, recall = precision_recall_at_k(predictions, k=10, threshold=3.5)
print(f"Precision@10: {precision:.4f}")
print(f"Recall@10:    {recall:.4f}")

# Coverage
cov = coverage(predictions, trainset, threshold=3.5)
print(f"Coverage:     {cov:.4f} ({cov*100:.2f}% de películas cubiertas)")

# F1-Score (combinación de Precision y Recall)
if precision + recall > 0:
    f1 = 2 * (precision * recall) / (precision + recall)
    print(f"F1-Score:     {f1:.4f}")


# ========== COMPARACIÓN: PREDICCIONES VS REALES ==========
print("\n" + "=" * 60)
print("EJEMPLOS: PREDICCIONES VS RATINGS REALES")
print("=" * 60)
print(f"{'Usuario':<10} {'Película':<10} {'Real':<8} {'Predicho':<10} {'Error':<8}")
print("-" * 60)

# Mostrar 20 ejemplos aleatorios
import random
sample_predictions = random.sample(predictions, min(20, len(predictions)))

for uid, iid, true_r, est, _ in sample_predictions:
    error = abs(true_r - est)
    print(f"{uid:<10} {iid:<10} {true_r:<8.1f} {est:<10.2f} {error:<8.2f}")

# ========== ESTADÍSTICAS DE ERROR ==========
print("\n" + "=" * 60)
print("DISTRIBUCIÓN DE ERRORES")
print("=" * 60)

errors = [abs(true_r - est) for (_, _, true_r, est, _) in predictions]
print(f"Error promedio:     {np.mean(errors):.4f}")
print(f"Error mediano:      {np.median(errors):.4f}")
print(f"Error máximo:       {np.max(errors):.4f}")
print(f"Desviación estándar: {np.std(errors):.4f}")

# Distribución de errores por rangos
print("\nDistribución de errores absolutos:")
bins = [0, 0.5, 1.0, 1.5, 2.0, 5.0]
for i in range(len(bins)-1):
    count = sum(bins[i] <= e < bins[i+1] for e in errors)
    percentage = (count / len(errors)) * 100
    print(f"  {bins[i]:.1f} - {bins[i+1]:.1f}: {count:6d} ({percentage:5.2f}%)")


MÉTRICAS ADICIONALES
Precision@10: 0.6821
Recall@10:    0.6348
Coverage:     0.7671 (76.71% de películas cubiertas)
F1-Score:     0.6576

EJEMPLOS: PREDICCIONES VS RATINGS REALES
Usuario    Película   Real     Predicho   Error   
------------------------------------------------------------
4085       3044       4.0      4.06       0.06    
2453       147        4.0      3.50       0.50    
2175       2404       3.0      3.26       0.26    
2590       1603       3.0      2.90       0.10    
204        3300       1.0      3.31       2.31    
117        1037       3.0      3.41       0.41    
3151       2184       5.0      4.02       0.98    
3170       2028       5.0      4.09       0.91    
2918       733        2.0      2.75       0.75    
5736       2010       4.0      4.14       0.14    
3365       3600       4.0      3.47       0.53    
3726       552        3.0      3.42       0.42    
1839       3785       4.0      4.00       0.00    
2393       1251       4.0      3.85       0.1