In [6]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [2]:
# 1. Cargar los datasets
pathSmall = '../Datasets/ml-latest-small/'
pathLarge = '../Datasets/ml-32m/'
ratings = pd.read_csv(pathSmall + 'ratings.csv')  # Debe tener columnas: userId, movieId, rating, timestamp
ratings.drop(columns=['timestamp'], inplace=True)  # Eliminar la columna timestamp


In [3]:
# 2. Crear matriz usuario-película
# Pivot table: filas = usuarios, columnas = películas, valores = rating
ratings_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [4]:
k = 10
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(ratings_matrix)

In [None]:
def predict_rating(user_index, item_index, data, clusters):
    cluster_id = clusters[user_index]
    cluster_members = np.where(clusters == cluster_id)[0]
    
    votes = []
    user_vector = data[user_index]

    for neighbor in cluster_members:
        if neighbor == user_index:
            continue
        neighbor_rating = data[neighbor, item_index]
        if neighbor_rating == 0:
            continue

        distance = np.linalg.norm(user_vector - data[neighbor])
        weight = 1 / (distance**2 + 1e-5)
        votes.append((neighbor_rating, weight))
    
    if not votes:
        return 0

    # Agrupar los pesos por calificación
    rating_weights = {}
    for rating, weight in votes:
        rating_weights[rating] = rating_weights.get(rating, 0) + weight

    # Obtener calificación(es) con mayor peso
    max_weight = max(rating_weights.values())
    top_ratings = [r for r, w in rating_weights.items() if w == max_weight]

    return np.mean(top_ratings)
