# Recommendation Systems Project

By : ADJAL Mehdi Zakaria - BENBETKA Rachid - YAMANI Mohammed Kamel - Rami Boukaroura

You will find the report in the following link : 

## How to run this notebook :

In [1]:
import os
from datetime import datetime
import numpy as np
import cornac
from cornac.models import GlobalLocalKernel
from cornac.eval_methods import RatioSplit
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP
from cornac.models import MF, PMF, BPR

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Function to find the next available log file name
def get_next_log_file(base_name="experiment_log", ext=".txt"):
    counter = 1
    while os.path.exists(f"{base_name}_{counter}{ext}"):
        counter += 1
    return f"{base_name}_{counter}{ext}"

In [3]:
# Function to log results
def log_results(log_file, test_results, model_instance):
    with open(log_file, "a") as f:
        f.write("\n" + "=" * 40 + "\n")
        f.write(f"Experiment conducted on: {datetime.now()}\n")
        f.write("\nHyperparameters:\n")
        for attr, value in vars(model_instance).items():
            f.write(f"{attr}: {value}\n")
        f.write("\nTest Results:\n")
        f.write(test_results)
        f.write("\n" + "=" * 40 + "\n")

In [4]:
# Load the MovieLens 100K dataset
ml_100k = cornac.datasets.movielens.load_feedback()

In [5]:
# Split the data
rs = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, seed=123)

In [6]:
# Get the total number of users and items in the subset
n_u = rs.total_users
n_m = rs.total_items

print('Data matrix loaded')
print('Number of users: {}'.format(n_u))
print('Number of movies: {}'.format(n_m))
print('Number of training ratings: {}'.format(len(rs.train_set.uir_tuple[2])))
print('Number of test ratings: {}'.format(len(rs.test_set.uir_tuple[2])))

Data matrix loaded
Number of users: 943
Number of movies: 1656
Number of training ratings: 80000
Number of test ratings: 19971


In [7]:
# Initialize your model
my_model = GlobalLocalKernel(
    # Example hyperparameters
    n_hid=10, 
    n_dim=2, 
    max_epoch_p=500, 
    max_epoch_f=1000,
    lr_p=0.1,
    lr_f=0.01, 
    verbose=False
)

In [13]:
# Models to compare with
mf = MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
pmf = PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123)
bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)

In [9]:
# Define some basic metrics
metrics = [MAE(), RMSE(), Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()]

In [10]:
# Redirect Cornac output to capture experiment results
from io import StringIO
import sys

# Get the next available log file name
log_file = get_next_log_file()
temp = sys.stdout  # Store original stdout object for later
sys.stdout = StringIO()  # Redirect stdout to capture results

Early stopping fine-tuning at epoch: 237
Early stopping fine-tuning at epoch: 241
Early stopping fine-tuning at epoch: 251
Early stopping fine-tuning at epoch: 261
Early stopping fine-tuning at epoch: 271
Early stopping fine-tuning at epoch: 281
Early stopping fine-tuning at epoch: 420
Early stopping fine-tuning at epoch: 421
Early stopping fine-tuning at epoch: 900

TEST:
...
                  |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
----------------- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
GlobalLocalKernel | 0.8029 | 0.9731 | 0.4371 | 0.0158 |  0.0168 |       0.0137 |    0.0110 |   55.5029 | 232.3715
MF                | 0.7430 | 0.8998 | 0.7445 | 0.0548 |  0.0761 |       0.0675 |    0.0463 |    0.0302 |   0.8123
PMF               | 0.7534 | 0.9138 | 0.7744 | 0.0671 |  0.0969 |       0.0813 |    0.0639 |    1.4669 |   1.2117
BPR               | 2.0143 | 2.2267 | 0.8695 | 0.1

In [11]:
# Run the experiment on the smaller subset
cornac.Experiment(eval_method=rs, models=[my_model, mf, pmf, bpr], metrics=metrics, user_based=True).run()

# Retrieve experiment results
experiment_results = sys.stdout.getvalue()
sys.stdout = temp # Restore stdout to original state

Epochs 1-10 (Pre-Training): 100%|██████████| 10/10 [00:01<00:00,  7.78it/s, Train RMSE=1.0664]
Epochs 11-20 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 32.31it/s, Train RMSE=1.1418]
Epochs 21-30 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 31.31it/s, Train RMSE=1.0929]
Epochs 31-40 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 34.31it/s, Train RMSE=1.0234]
Epochs 41-50 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 33.04it/s, Train RMSE=1.0055]
Epochs 51-60 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 33.87it/s, Train RMSE=0.9866]
Epochs 61-70 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 30.84it/s, Train RMSE=0.9950]
Epochs 71-80 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 27.34it/s, Train RMSE=0.9840]
Epochs 81-90 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 30.68it/s, Train RMSE=0.9846]
Epochs 91-100 (Pre-Training): 100%|██████████| 10/10 [00:00<00:00, 32.19it/s, Train RMSE=0.9751]
Epochs 101-110 (Pre-Training): 100%|████

In [12]:
# Print the results to the console
print(experiment_results)

# Log results to file
log_results(log_file, experiment_results, my_model)

print(f"Experiment results and hyperparameters saved to {log_file}")