In [4]:
import sys
sys.path.append('../src')

from data_loader import load_data, preprocess_data
from recommender import build_trainset, user_based_cf, get_top_n
from surprise.model_selection import train_test_split

import os
os.chdir("..")

In [5]:
ratings, items, users = load_data()
ratings_clean = preprocess_data(ratings)

In [6]:
from surprise import Dataset, Reader
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_clean[['user_id', 'item_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25, random_state=42)

In [7]:
algo_user = user_based_cf(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [8]:
predictions = algo_user.test(testset)

In [9]:
for pred in predictions[:5]:
    print(pred)

user: 391        item: 591        r_ui = 4.00   est = 3.85   {'actual_k': 20, 'was_impossible': False}
user: 181        item: 1291       r_ui = 1.00   est = 2.99   {'actual_k': 7, 'was_impossible': False}
user: 637        item: 268        r_ui = 2.00   est = 3.90   {'actual_k': 20, 'was_impossible': False}
user: 332        item: 451        r_ui = 5.00   est = 3.40   {'actual_k': 20, 'was_impossible': False}
user: 271        item: 204        r_ui = 4.00   est = 4.00   {'actual_k': 20, 'was_impossible': False}


In [10]:
top_n = get_top_n(predictions, n=10)

In [14]:
import pandas as pd

top_n_list = []
for uid, user_ratings in top_n.items():
    for (iid, rating) in user_ratings:
        top_n_list.append({'user_id': uid, 'item_id': iid, 'predicted_rating': rating})

top_n_df = pd.DataFrame(top_n_list)
top_n_df.to_csv('results/top_n_recommendations.csv', index=False)
print("\nUser-based top-N recommendations saved!")



User-based top-N recommendations saved!
