In [1]:
# Sezione 1: Import librerie
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [2]:
# Sezione 2: Caricamento dati (MovieLens 100k come esempio)
url = "../datasets/ml-100k/u.data"
columns = ['user_id', 'movie_id', 'rating', 'timestamp']
df = pd.read_csv(url, sep='\t', names=columns)

# Rimuoviamo timestamp
df.drop('timestamp', axis=1, inplace=True)

In [3]:
# Sezione 3: Creazione matrice utente-film
ratings_matrix = df.pivot_table(index='user_id', columns='movie_id', values='rating')
ratings_matrix.fillna(0, inplace=True)

# Sezione 4: Split del dataset
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
train_matrix = train_data.pivot_table(
    index='user_id',
    columns='movie_id',
    values='rating'
).fillna(0)


In [4]:
# Sezione 5: Calcolo similarità tra utenti
user_similarity = cosine_similarity(train_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix.index, columns=train_matrix.index)

# Funzione per raccomandare film a un utente basandosi sugli utenti simili
def recommend_movies(user_id, num_recommendations=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:]  # escludi se stesso
    weighted_ratings = np.zeros(train_matrix.shape[1])
    
    for other_user, similarity in similar_users.items():
        weighted_ratings += similarity * train_matrix.loc[other_user].values
    
    user_rated = train_matrix.loc[user_id].values > 0
    weighted_ratings[user_rated] = 0  # Escludi quelli già visti
    recommended_indices = np.argsort(weighted_ratings)[::-1][:num_recommendations]
    
    return train_matrix.columns[recommended_indices]

  ret = a @ b
  ret = a @ b
  ret = a @ b


In [5]:
# Sezione 6: Costruzione predizioni binarie per valutazione
threshold = 3  # consideriamo raccomandazioni positive se rating >= 3

def predict_rating(user_id, movie_id):
    if movie_id not in ratings_matrix.columns:
        return 0
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:]
    
    num = 0
    den = 0
    for other_user, similarity in similar_users.items():
        rating = ratings_matrix.loc[other_user, movie_id]
        if rating > 0:
            num += similarity * rating
            den += similarity
    if den == 0:
        return 0
    return num / den

In [None]:
# Sezione 7: Valutazione – Estesa con MAE e RMSE
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# (riutilizzi y_true_continuous e y_pred_continuous per errori sui rating continui)
y_true_binary = []
y_pred_binary = []
y_true_continuous = []
y_pred_continuous = []

test_sample = test_data.sample(1000, random_state=1)

for _, row in test_sample.iterrows():
    # vero rating e predizione continua
    true_rating = row['rating']
    pred_rating = predict_rating(row['user_id'], row['movie_id'])
    
    # raccolgo per MAE/RMSE
    y_true_continuous.append(true_rating)
    y_pred_continuous.append(pred_rating)
    
    # raccolgo per Accuracy/Precision/Recall
    actual = 1 if true_rating >= threshold else 0
    predicted = 1 if pred_rating >= threshold else 0
    
    y_true_binary.append(actual)
    y_pred_binary.append(predicted)

# calcolo Accuracy, Precision, Recall
accuracy = accuracy_score(y_true_binary, y_pred_binary)
precision = precision_score(y_true_binary, y_pred_binary, zero_division=0)
recall = recall_score(y_true_binary, y_pred_binary, zero_division=0)

# calcolo MAE e RMSE
mae = mean_absolute_error(y_true_continuous, y_pred_continuous)
rmse = np.sqrt(mean_squared_error(y_true_continuous, y_pred_continuous))

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"MAE:       {mae:.4f}")
print(f"RMSE:      {rmse:.4f}")

Accuracy: 0.8230
Precision: 0.8792
Recall: 0.9139
