#**Recommendation Systems**

> In this notebook, the different recommendation systems available in the Cornac library will be executed.



#**1.- DATASET LOADING**

In [1]:
!wget -O Gift_Cards.jsonl.gz https://web.archive.org/web/20240314164222/https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_2023/raw/review_categories/Gift_Cards.jsonl.gz --no-check-certificate
!rm -f Gift_Cards.jsonl
!gzip -d Gift_Cards.jsonl.gz


--2025-07-03 08:55:07--  https://web.archive.org/web/20240314164222/https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_2023/raw/review_categories/Gift_Cards.jsonl.gz
Resolving web.archive.org (web.archive.org)... 207.241.237.3
Connecting to web.archive.org (web.archive.org)|207.241.237.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12556849 (12M) [application/x-gzip]
Saving to: ‘Gift_Cards.jsonl.gz’


2025-07-03 08:55:09 (47.9 MB/s) - ‘Gift_Cards.jsonl.gz’ saved [12556849/12556849]



#**1.2.- PACKAGE LOADING**

In [3]:
!pip install cornac
import cornac
from cornac.eval_methods import RatioSplit
from cornac.models import MF, PMF, BPR, VAECF, COE, HPF, IBPR, OnlineIBPR
from cornac.metrics import  FMeasure, Precision, Recall, NDCG, AUC, MAP
import random




Collecting cornac
  Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.4/51.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting powerlaw (from cornac)
  Downloading powerlaw-1.5-py3-none-any.whl.metadata (9.3 kB)
Downloading cornac-2.3.3-cp311-cp311-manylinux1_x86_64.whl (31.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.5/31.5 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw, cornac
Successfully installed cornac-2.3.3 powerlaw-1.5


#**2.- CORNAC LIBRARY DATA LOADING**

In [6]:
import json
import pandas as pd


file = "Gift_Cards.jsonl"
all_data = []

with open(file, 'r') as fp:
  for line in fp:
    line_data = json.loads(line.strip())
    all_data.append((line_data["user_id"], line_data["asin"], line_data["rating"]))

all_data = pd.DataFrame(all_data, columns=["user_id", "item", "rating"])
all_data.head()

Unnamed: 0,user_id,item,rating
0,AHZ6XMOLEWA67S3TX7IWEXXGWSOA,B00IX1I3G6,5.0
1,AFZUK3MTBIBEDQOPAK3OATUOUKLA,B005ESMMWW,5.0
2,AFZUK3MTBIBEDQOPAK3OATUOUKLA,B01K8RIM5Y,5.0
3,AFZUK3MTBIBEDQOPAK3OATUOUKLA,B0091JKVU0,5.0
4,AH5L7ILVA6HYLZOUZIQAWNHVVK3A,B00FTGTM5E,1.0


##  **2.1- DATASET DIVISION FOR MODELING**

In [7]:
from cornac.datasets import movielens
from cornac.eval_methods import RatioSplit

random_seed = 2533

rs = RatioSplit(data=all_data.values , test_size=0.2, val_size=0.2, rating_threshold=3.0, seed=random_seed)

print(f"There are {rs.train_size} training examples")
print(f"There are {rs.val_size} validation examples")
print(f"There are {rs.test_size} test examples")

There are 91446 training examples
There are 30482 validation examples
There are 30482 test examples




In [8]:
# Initialize model's metrics
F1_at_10 = cornac.metrics.FMeasure(k=10)  # F1 Measure at 10
F1_at_100 = cornac.metrics.FMeasure(k=100)  # F1 Measure at 100
Precision = cornac.metrics.Precision(k=50)  # Precisión en el Top-50
Recall = cornac.metrics.Recall(k=50)  # Recall en el Top-50
NDCG = cornac.metrics.NDCG()  # Normalized Discounted Cumulative Gain
AUC = cornac.metrics.AUC()  # Area Under the ROC Curve
MAP = cornac.metrics.MAP()  # Mean Average Precision
MAE = cornac.metrics.MAE()  # Mean Absolute Error
RMSE = cornac.metrics.RMSE()  # Root Mean Squared Error
MSE = cornac.metrics.MSE()

#**3.- POP**

In [9]:
# Define the model
most_pop = cornac.models.MostPop()

# Define the experiment
cornac.Experiment(eval_method=rs,models=[most_pop],
                  metrics=[ Recall, NDCG, AUC],
                  verbose= True, user_based=True).run()



[MostPop] Training started!

[MostPop] Evaluation started!


Ranking:   0%|          | 0/4120 [00:00<?, ?it/s]

Ranking:   0%|          | 0/4030 [00:00<?, ?it/s]


VALIDATION:
...
        |    AUC | NDCG@-1 | Recall@50 | Time (s)
------- + ------ + ------- + --------- + --------
MostPop | 0.9113 |  0.2881 |    0.4903 |   2.2096

TEST:
...
        |    AUC | NDCG@-1 | Recall@50 | Train (s) | Test (s)
------- + ------ + ------- + --------- + --------- + --------
MostPop | 0.9136 |  0.2843 |    0.5030 |    0.0157 |   3.0487



#**4.- MF**

In [10]:
import cornac
from cornac.hyperopt import Discrete, Continuous, GridSearch


MF = cornac.models.MF(k=10, max_iter=100, learning_rate=0.01,
                      lambda_reg=0.02, use_bias=True, seed=random_seed)


space = [
    Discrete(name= "k", values= [5, 15, 45, 100]),
    Discrete(name= "learning_rate", values= [0.001, 0.01, 0.1, 0.5])]

# Grid Search
gs_MF = GridSearch(
    model=MF,
    space=space,
    metric=MAE,
    eval_method=rs)


cornac.Experiment(
    eval_method=rs,
    models=[gs_MF],
    metrics=[MAE, RMSE, MSE],
    verbose=True,
    user_based=True
).run()


print("best hyperparameters:", gs_MF.best_params)


[GridSearch_MF] Training started!
Evaluating: {'k': 5, 'learning_rate': 0.001}
Evaluating: {'k': 5, 'learning_rate': 0.01}
Evaluating: {'k': 5, 'learning_rate': 0.1}
Evaluating: {'k': 5, 'learning_rate': 0.5}
Evaluating: {'k': 15, 'learning_rate': 0.001}
Evaluating: {'k': 15, 'learning_rate': 0.01}
Evaluating: {'k': 15, 'learning_rate': 0.1}
Evaluating: {'k': 15, 'learning_rate': 0.5}
Evaluating: {'k': 45, 'learning_rate': 0.001}
Evaluating: {'k': 45, 'learning_rate': 0.01}
Evaluating: {'k': 45, 'learning_rate': 0.1}
Evaluating: {'k': 45, 'learning_rate': 0.5}
Evaluating: {'k': 100, 'learning_rate': 0.001}
Evaluating: {'k': 100, 'learning_rate': 0.01}
Evaluating: {'k': 100, 'learning_rate': 0.1}
Evaluating: {'k': 100, 'learning_rate': 0.5}
Best parameter settings: {'k': 5, 'learning_rate': 0.1}
MAE = 0.2468

[GridSearch_MF] Evaluation started!


Rating:   0%|          | 0/4753 [00:00<?, ?it/s]

Rating:   0%|          | 0/4673 [00:00<?, ?it/s]


VALIDATION:
...
              |    MAE |    MSE |   RMSE | Time (s)
------------- + ------ + ------ + ------ + --------
GridSearch_MF | 0.2515 | 0.3301 | 0.2565 |   1.6458

TEST:
...
              |    MAE |    MSE |   RMSE | Train (s) | Test (s)
------------- + ------ + ------ + ------ + --------- + --------
GridSearch_MF | 0.2636 | 0.3762 | 0.2685 |   16.0920 |   1.7336

Mejores hiperparámetros: {'k': 5, 'learning_rate': 0.1}


# **5.- BPR**

In [11]:
BPR = cornac.models.BPR(k=100, max_iter=100, learning_rate=0.5,
                        lambda_reg=0.02, seed=random_seed)

space = [Discrete(name="lambda_reg",
                  values=[0.0001,0.001,0.01,0.1,0.5])]

# Grid Search
gs_BPR = GridSearch(
    model=BPR,
    space=space,
    metric=NDCG,
    eval_method=rs
)


cornac.Experiment(
    eval_method=rs,
    models=[gs_BPR],
    metrics=[Recall, NDCG, AUC],
    verbose=True,
    user_based=True
).run()


print("best hyperparameters:", gs_BPR.best_params)


[GridSearch_BPR] Training started!
Evaluating: {'lambda_reg': 0.0001}
Evaluating: {'lambda_reg': 0.001}
Evaluating: {'lambda_reg': 0.01}
Evaluating: {'lambda_reg': 0.1}
Evaluating: {'lambda_reg': 0.5}
Best parameter settings: {'lambda_reg': 0.01}
NDCG@-1 = 0.3399

[GridSearch_BPR] Evaluation started!


Ranking:   0%|          | 0/4120 [00:00<?, ?it/s]

Ranking:   0%|          | 0/4030 [00:00<?, ?it/s]


VALIDATION:
...
               |    AUC | NDCG@-1 | Recall@50 | Time (s)
-------------- + ------ + ------- + --------- + --------
GridSearch_BPR | 0.8986 |  0.3389 |    0.5577 |   3.0760

TEST:
...
               |    AUC | NDCG@-1 | Recall@50 | Train (s) | Test (s)
-------------- + ------ + ------- + --------- + --------- + --------
GridSearch_BPR | 0.9004 |  0.3387 |    0.5688 |   32.1681 |   3.4603

best hyperparameters: {'lambda_reg': 0.01}


# **6.-GA**

In [12]:
# Define the model
global_avg = cornac.models.GlobalAvg()
# Define the experiment
cornac.Experiment(eval_method=rs,models=[global_avg],
                  metrics=[ MAE, RMSE, MSE],
                  verbose= True, user_based=True).run()



[GlobalAvg] Training started!

[GlobalAvg] Evaluation started!


Rating:   0%|          | 0/4753 [00:00<?, ?it/s]

Rating:   0%|          | 0/4673 [00:00<?, ?it/s]


VALIDATION:
...
          |    MAE |    MSE |   RMSE | Time (s)
--------- + ------ + ------ + ------ + --------
GlobalAvg | 0.5684 | 0.6223 | 0.5699 |   1.9304

TEST:
...
          |    MAE |    MSE |   RMSE | Train (s) | Test (s)
--------- + ------ + ------ + ------ + --------- + --------
GlobalAvg | 0.5725 | 0.6493 | 0.5735 |    0.0006 |   1.7038



# **7.- NeuMF**

In [13]:
# Parameters
num_factors = 16
layers = [64, 32, 16, 8]
act_fn = "relu"
learner = "adam"
backend = "pytorch"  # O "tensorflow"
num_epochs = 20
batch_size = 256
lr = 0.01
num_neg = 10
seed = random_seed

neumf = cornac.models.NeuMF(
    num_factors=num_factors,
    layers=layers,
    act_fn=act_fn,
    learner=learner,
    backend=backend,
    num_epochs=num_epochs,
    batch_size=batch_size,
    lr=lr,
    num_neg=num_neg,
    seed=seed)

# Execute the experiment
cornac.Experiment(
    eval_method=rs,
    models=[neumf],
    metrics=[Recall, NDCG, AUC],
    verbose=True
).run()



[NeuMF] Training started!


  0%|          | 0/20 [00:00<?, ?it/s]


[NeuMF] Evaluation started!


Ranking:   0%|          | 0/4120 [00:00<?, ?it/s]

Ranking:   0%|          | 0/4030 [00:00<?, ?it/s]


VALIDATION:
...
      |    AUC | NDCG@-1 | Recall@50 | Time (s)
----- + ------ + ------- + --------- + --------
NeuMF | 0.7874 |  0.2457 |    0.3153 |   7.0772

TEST:
...
      |    AUC | NDCG@-1 | Recall@50 | Train (s) | Test (s)
----- + ------ + ------- + --------- + --------- + --------
NeuMF | 0.7928 |  0.2462 |    0.3193 |  442.1549 |   5.7595



#**8.- NEURAL RATING**

In [18]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from cornac.models import Recommender

class Neural_Rating(Recommender):
    def __init__(self, embedding_dim=10, dropout=0.2, dense_units1=10,
                 dense_units2=8, l2_reg=0.001,
                 epochs=50, batch_size=64,
                 learning_rate=0.0001, patience=5,
                 verbose=1, **kwargs):
        super().__init__(name="NR", **kwargs)
        self.embedding_dim = embedding_dim
        self.dropout = dropout
        self.dense_units1 = dense_units1
        self.dense_units2 = dense_units2
        self.l2_reg = l2_reg
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.patience = patience
        self.verbose = verbose
        self.model = None

    def fit(self, train_set, val_set=None):
        Recommender.fit(self, train_set, val_set)
        num_users, num_items = train_set.num_users, train_set.num_items

        user_input = Input(shape=(1,), name='user_input')
        item_input = Input(shape=(1,), name='item_input')

        user_embedding = Embedding(input_dim=num_users, output_dim=self.embedding_dim,
                                   input_length=1, name='user_embedding',
                                   embeddings_regularizer=l2(self.l2_reg))(user_input)
        item_embedding = Embedding(input_dim=num_items, output_dim=self.embedding_dim,
                                   input_length=1, name='item_embedding',
                                   embeddings_regularizer=l2(self.l2_reg))(item_input)

        user_flatten = Flatten()(user_embedding)
        item_flatten = Flatten()(item_embedding)

        concat = Concatenate()([user_flatten, item_flatten])

        dense1 = Dense(self.dense_units1, activation='relu',
                       kernel_regularizer=l2(self.l2_reg))(concat)
        batch_norm1 = BatchNormalization()(dense1)
        dropout1 = Dropout(self.dropout)(batch_norm1)


        dense2 = Dense(self.dense_units2, activation='relu',
                       kernel_regularizer=l2(self.l2_reg))(dropout1)
        batch_norm2 = BatchNormalization()(dense2)
        dropout2 = Dropout(self.dropout)(batch_norm2)

        output = Dense(1, activation='linear')(dropout2)

        self.model = Model(inputs=[user_input, item_input],
                           outputs=output)
        self.model.compile(optimizer=Adam(learning_rate=self.learning_rate),
                           loss='mean_squared_error')

        user_ids, item_ids, ratings = train_set.uir_tuple
        user_ids = np.array(user_ids, dtype=np.int64)
        item_ids = np.array(item_ids, dtype=np.int64)
        ratings = np.array(ratings, dtype=np.float32)

        callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=self.patience,
                                                      restore_best_weights=True)]

        if val_set is not None:
            val_user_ids, val_item_ids, val_ratings = val_set.uir_tuple
            val_user_ids = np.array(val_user_ids, dtype=np.int64)
            val_item_ids = np.array(val_item_ids, dtype=np.int64)
            val_ratings = np.array(val_ratings, dtype=np.float32)
            self.model.fit([user_ids, item_ids],
                           ratings,
                           validation_data=([val_user_ids, val_item_ids], val_ratings),
                           epochs=self.epochs,
                           batch_size=self.batch_size,
                           verbose=self.verbose, callbacks=callbacks)
        else:
            self.model.fit([user_ids, item_ids], ratings, epochs=self.epochs,
                           batch_size=self.batch_size,
                           verbose=self.verbose,
                           callbacks=callbacks)
        return self

    def score(self, user_idx, item_idx=None):

        if not hasattr(self, 'predictions'):

            user_indices = np.repeat(np.arange(self.train_set.num_users),
                                     self.train_set.num_items)
            item_indices = np.tile(np.arange(self.train_set.num_items),
                                   self.train_set.num_users)

            self.predictions = self.model.predict([user_indices, item_indices],
              batch_size=2048, verbose=0).reshape(self.train_set.num_users,
                                                  self.train_set.num_items)


        if item_idx is None:
            return self.predictions[user_idx, :]
        else:

            return self.predictions[user_idx, item_idx]

In [19]:
NR = Neural_Rating()
experiment = cornac.Experiment(eval_method=rs, models=[NR],
                               metrics=[MAE, RMSE, MSE], verbose=True, user_based=True)

# Execute the experiment
experiment.run()


[NR] Training started!
Epoch 1/3
[1m1421/1421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - loss: 21.9285 - val_loss: 19.2718
Epoch 2/3
[1m1421/1421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - loss: 16.1862 - val_loss: 12.2881
Epoch 3/3
[1m1421/1421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - loss: 9.6023 - val_loss: 5.8529

[NR] Evaluation started!


Rating:   0%|          | 0/4753 [00:00<?, ?it/s]

KeyboardInterrupt: 