In [61]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_score, recall_score, accuracy_score, mean_absolute_error, mean_squared_error
import numpy as np
from sklearn.model_selection import train_test_split


In [62]:
# Caricamento dati
ratings = pd.read_csv('../datasets/ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
items = pd.read_csv('../datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1] + list(range(5, 24)))
items.columns = ['item_id', 'title'] + [f'genre_{i}' for i in range(19)]

# Mappatura item
item_ids = items['item_id'].tolist()
item_index = {item_id: idx for idx, item_id in enumerate(item_ids)}

# Costruzione matrice delle feature
feature_matrix = items.drop(columns=['item_id', 'title']).values.astype(np.float32)

In [63]:
# Normalizza i vettori item feature
norms = np.linalg.norm(feature_matrix, axis=1, keepdims=True)
norms[norms == 0] = 1  # evita divisione per zero
feature_matrix = feature_matrix / norms

# Split train/test
train, test = train_test_split(ratings, test_size=0.2, random_state=42)

In [64]:
def build_user_profiles(ratings_df):
    profiles = {}
    for uid, grp in ratings_df.groupby('user_id'):
        vecs, weights = [], []
        for _, row in grp.iterrows():
            iid, rating = row['item_id'], row['rating']
            if iid in item_index:
                vecs.append(feature_matrix[item_index[iid]])
                weights.append(rating)
        if vecs:
            mat = np.vstack(vecs)
            profile = np.average(mat, axis=0, weights=weights)
            norm = np.linalg.norm(profile)
            if norm > 0:
                profile = profile / norm
            profiles[uid] = profile
    return profiles

user_profiles = build_user_profiles(train)

In [65]:
def predict_rating(user_id, item_id):
    if user_id in user_profiles and item_id in item_index:
        user_vec = user_profiles[user_id]
        item_vec = feature_matrix[item_index[item_id]]
        if np.any(np.isnan(user_vec)) or np.any(np.isnan(item_vec)):
            return np.nan
        sim = cosine_similarity(user_vec.reshape(1, -1), item_vec.reshape(1, -1))[0][0]
        return sim * 4 + 1
    return np.nan

In [66]:

def recommend_for_user(user_id, top_n=10):
    if user_id not in user_profiles:
        return []
    user_vec = user_profiles[user_id].reshape(1, -1)
    sims = cosine_similarity(user_vec, feature_matrix).flatten()
    indices = np.argsort(sims)[::-1]
    recommendations = []
    seen_items = set(train[train['user_id'] == user_id]['item_id'])
    for idx in indices:
        item_id = item_ids[idx]
        if item_id not in seen_items:
            recommendations.append((item_id, sims[idx]))
        if len(recommendations) == top_n:
            break
    return recommendations

In [67]:
def evaluate_top_n(test_df, top_n=10, threshold=4.0):
    y_true, y_pred = [], []
    for uid, grp in test_df.groupby('user_id'):
        relevant = set(grp[grp['rating'] >= threshold]['item_id'])
        recs = recommend_for_user(uid, top_n)
        rec_items = set([iid for iid, _ in recs])
        for iid in grp['item_id']:
            y_true.append(1 if iid in relevant else 0)
            y_pred.append(1 if iid in rec_items else 0)

    if sum(y_pred) == 0:
        prec = 0.0
    else:
        prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    acc = accuracy_score(y_true, y_pred)
    return prec, rec, acc

In [None]:

precision, recall, accuracy = evaluate_top_n(test, top_n=10)

test['pred_rating'] = test.apply(lambda row: predict_rating(row['user_id'], row['item_id']), axis=1)
test_clean = test.dropna(subset=['pred_rating'])
mae = mean_absolute_error(test_clean['rating'], test_clean['pred_rating'])
rmse = mean_squared_error(test_clean['rating'], test_clean['pred_rating']) ** 0.5

print(f"Precision@10: {precision:.4f}")
print(f"Recall@10:    {recall:.4f}")
print(f"Accuracy@10:  {accuracy:.4f}")
print(f"MAE:  {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret 

Precision@10: 0.6000
Recall@10:    0.0066
Accuracy@10:  0.4517
MAE:  1.1803
RMSE: 1.4535
