In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from sentence_transformers import SentenceTransformer
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning.callbacks import Callback
import pandas as pd
import numpy as np
from pytorch_lightning.callbacks import ModelCheckpoint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3060 Laptop GPU'

In [3]:
users = pd.read_csv('processed_dataset/MovieLens-1M/users/users_movielens.csv')
movies = pd.read_csv('processed_dataset/MovieLens-1M/movies/movies_movielens.csv')
full_ratings = pd.read_csv('processed_dataset/MovieLens-1M/ratings/ratings_fulldata_movielens.csv')
train_ratings = pd.read_csv('processed_dataset/MovieLens-1M/ratings/ratings_traindata_movielens.csv')
val_ratings = pd.read_csv('processed_dataset/MovieLens-1M/ratings/ratings_valdata_movielens.csv')
test_ratings = pd.read_csv('processed_dataset/MovieLens-1M/ratings/ratings_testdata_movielens.csv')

In [4]:
# Combine user features into a single string for each user
users['user_features'] = 'occupation: ' + users['occupation'] + ' [SEP] age: ' + users['age'].astype(str) + ' [SEP] gender: ' + users['gender'].astype(str)

# Combine movie features into a single string for each movie
movies['movie_features'] = 'title: ' + movies['title'] + ' [SEP] genres: ' + movies['genres']

In [5]:
# Create a dictionary for fast lookup
user_features_dict = users.set_index('user_id')['user_features'].to_dict()
movie_features_dict = movies.set_index('item_id')['movie_features'].to_dict()

# Create lists of user and item texts
user_texts = [user_features_dict[userId] for userId in full_ratings['user_id'].unique()]
item_texts = [movie_features_dict[movieId] for movieId in full_ratings['item_id'].unique()]

# Create a mapping from userId and movieId to indices
user_id_to_idx = {userId: idx for idx, userId in enumerate(full_ratings['user_id'].unique())}
movie_id_to_idx = {movieId: idx for idx, movieId in enumerate(full_ratings['item_id'].unique())}

# Map userId and movieId in ratings_df to indices
train_ratings['user_idx'] = train_ratings['user_id'].map(user_id_to_idx)
train_ratings['movie_idx'] = train_ratings['item_id'].map(movie_id_to_idx)

# Map userId and movieId in ratings_val to indices
val_ratings['user_idx'] = val_ratings['user_id'].map(user_id_to_idx)
val_ratings['movie_idx'] = val_ratings['item_id'].map(movie_id_to_idx)

# Map userId and movieId in ratings_val to indices
test_ratings['user_idx'] = test_ratings['user_id'].map(user_id_to_idx)
test_ratings['movie_idx'] = test_ratings['item_id'].map(movie_id_to_idx)

# Extract user indices, item indices, and ratings
train_user_indices = torch.LongTensor(train_ratings['user_idx'].values).to(device)
train_item_indices = torch.LongTensor(train_ratings['movie_idx'].values).to(device)
train_labels = torch.FloatTensor(train_ratings['rating'].values).to(device)

# Extract user indices, item indices, and ratings for validation
val_user_indices = torch.LongTensor(val_ratings['user_idx'].values).to(device)
val_item_indices = torch.LongTensor(val_ratings['movie_idx'].values).to(device)
val_labels = torch.FloatTensor(val_ratings['rating'].values).to(device)

test_user_indices = torch.LongTensor(test_ratings['user_idx'].values).to(device)
test_item_indices = torch.LongTensor(test_ratings['movie_idx'].values).to(device)
test_labels = torch.FloatTensor(test_ratings['rating'].values).to(device)

In [6]:
item_texts[train_item_indices[0].item()]

'title: Girl, Interrupted (1999) [SEP] genres: Drama'

In [7]:
user_texts[train_user_indices[0].item()]

'occupation: K-12 student [SEP] age: Under 18 [SEP] gender: Female'

In [8]:
train_labels[0].item()

1.0

In [9]:
# Create DataLoader for training data
train_dataset = TensorDataset(train_user_indices, train_item_indices, train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)

# Create DataLoader for training data
val_dataset = TensorDataset(val_user_indices, val_item_indices, val_labels)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True, drop_last=True)

# Create DataLoader for training data
test_dataset = TensorDataset(test_user_indices, test_item_indices, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True, drop_last=True)

In [10]:
class TwoTowerModel(pl.LightningModule):
    def __init__(self, user_model_name, item_model_name, embedding_size=384):
        super(TwoTowerModel, self).__init__()
        self.user_model = SentenceTransformer(user_model_name)
        self.item_model = SentenceTransformer(item_model_name)

        self.user_fc = nn.Linear(embedding_size, embedding_size)
        self.item_fc = nn.Linear(embedding_size, embedding_size)

        self.criterion = nn.BCELoss()
        self.epoch_losses = {'train_loss': [], 'val_loss': []}

    def forward(self, user_text, item_text):
        user_embedding = self.user_model.encode(user_text, convert_to_tensor=True).to(device)
        item_embedding = self.item_model.encode(item_text, convert_to_tensor=True).to(device)

        user_output = self.user_fc(user_embedding)
        item_output = self.item_fc(item_embedding)

        # dot_product = torch.sum(user_output * item_output, dim=1)
        dot_product = torch.matmul(user_output.unsqueeze(1), item_output.unsqueeze(2)).squeeze()
        # Apply sigmoid transformation and scaling here

        dot_product = torch.sigmoid(dot_product)

        return dot_product

    def training_step(self, batch, batch_idx):
        users, items, ratings = batch

        users = [user_texts[i] for i in users.tolist()]
        items = [item_texts[i] for i in items.tolist()]

        preds = self(users, items)

        loss = self.criterion(preds, ratings)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        users, items, ratings = batch

        users = [user_texts[i] for i in users.tolist()]
        items = [item_texts[i] for i in items.tolist()]

        preds = self(users, items)

        loss = self.criterion(preds, ratings)
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=1e-5)

    def full_predict(self, user_ids, item_ids):
        users = [user_texts[i] for i in user_ids.tolist()]
        items = [item_texts[i] for i in item_ids.tolist()]
        user_embedding = self.user_model.encode(users, convert_to_tensor=True).to(self.device)
        item_embeddings = torch.stack([self.item_model.encode(item_text, convert_to_tensor=True) for item_text in items]).to(self.device)

        user_output = self.user_fc(user_embedding)
        item_output = self.item_fc(item_embeddings)

        dot_product = torch.matmul(user_output, item_output.T)
        # dot_product = 4 * torch.sigmoid(dot_product) + 1

        return dot_product

class PrintLossesCallback(Callback):
    def on_train_epoch_end(self, trainer, pl_module):
        train_loss = trainer.callback_metrics.get('train_loss')
        if train_loss is not None:
            pl_module.epoch_losses['train_loss'].append(train_loss.item())
            print(f"Epoch {trainer.current_epoch + 1}: Train Loss: {train_loss.item()}")

    def on_validation_epoch_end(self, trainer, pl_module):
        val_loss = trainer.callback_metrics.get('val_loss')
        if val_loss is not None:
            pl_module.epoch_losses['val_loss'].append(val_loss.item())
            print(f"Epoch {trainer.current_epoch + 1}: Val Loss: {val_loss.item()}")

In [11]:
# model = TwoTowerModel(user_model_name='all-MiniLM-L6-v2', item_model_name='all-MiniLM-L6-v2')
model = TwoTowerModel(user_model_name='paraphrase-MiniLM-L6-v2', item_model_name='paraphrase-MiniLM-L6-v2')

# Define the ModelCheckpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',  # Metric to monitor
    dirpath='checkpoints/',  # Directory to save the checkpoints
    filename='no-history-best-checkpoint',  # Filename for the best model
    save_top_k=1,  # Save only the top 1 model
    mode='min'  # Mode to save the best model (min for validation loss)
)

trainer = pl.Trainer(max_epochs=5, log_every_n_steps=1, callbacks=[PrintLossesCallback()], enable_progress_bar=True)
trainer.fit(model, train_dataloader, val_dataloader)

# Print losses after training completes
print("Epoch losses:")
for epoch in range(trainer.max_epochs):
    train_loss = model.epoch_losses['train_loss'][epoch] if epoch < len(model.epoch_losses['train_loss']) else 'N/A'
    val_loss = model.epoch_losses['val_loss'][epoch] if epoch < len(model.epoch_losses['val_loss']) else 'N/A'
    print(f"Epoch {epoch + 1}: Train Loss: {train_loss}, Val Loss: {val_loss}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params | Mode 
-----------------------------------------------------------
0 | user_model | SentenceTransformer | 22.7 M | train
1 | item_model | SentenceTransformer | 22.7 M | train
2 | user_fc    | Linear              | 147 K  | train
3 | item_fc    | Linear              | 147 K  | train
4 | criterion  | BCELoss             | 0      | train
-----------------------------------------------------------
45.7 M    Trainable pa

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

D:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:475: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
D:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00,  3.72it/s]Epoch 1: Val Loss: 1.287331223487854
                                                                           

D:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 12464/12464 [09:56<00:00, 20.90it/s, v_num=2]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1557 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1557 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/1557 [00:00<01:22, 18.86it/s][A
Validation DataLoader 0:   0%|          | 2/1557 [00:00<01:21, 19.18it/s][A
Validation DataLoader 0:   0%|          | 3/1557 [00:00<01:18, 19.69it/s][A
Validation DataLoader 0:   0%|          | 4/1557 [00:00<01:18, 19.85it/s][A
Validation DataLoader 0:   0%|          | 5/1557 [00:00<01:20, 19.40it/s][A
Validation DataLoader 0:   0%|          | 6/1557 [00:00<01:20, 19.20it/s][A
Validation DataLoader 0:   0%|          | 7/1557 [00:00<01:21, 19.10it/s][A
Validation DataLoader 0:   1%|          | 8/1557 [00:00<01:21, 18.93it/s][A
Validation DataLoader 0:   1%|          | 9/1557 [00:00<01:21, 18.92it/s][A
Validation DataLoader 0:   1%|          | 10/1557 [00:00<01:2

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 12464/12464 [17:49<00:00, 11.65it/s, v_num=2]
Epoch losses:
Epoch 1: Train Loss: 0.3183195888996124, Val Loss: 1.287331223487854
Epoch 2: Train Loss: 0.5379856824874878, Val Loss: 0.45808055996894836
Epoch 3: Train Loss: 0.4303486943244934, Val Loss: 0.4514903426170349
Epoch 4: Train Loss: 0.4938172698020935, Val Loss: 0.45266953110694885
Epoch 5: Train Loss: 0.38519468903541565, Val Loss: 0.45056694746017456


In [112]:
class ImprovedTwoTowerModel(pl.LightningModule):
    def __init__(self, user_model_name, item_model_name, embedding_size=384, hidden_units=64, dropout_rate=0.5):
        super(ImprovedTwoTowerModel, self).__init__()
        self.user_model = SentenceTransformer(user_model_name)
        self.item_model = SentenceTransformer(item_model_name)

        self.user_fc = nn.Sequential(
            nn.Linear(embedding_size, hidden_units),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_units, embedding_size)
        )
        self.item_fc = nn.Sequential(
            nn.Linear(embedding_size, hidden_units),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_units, embedding_size)
        )

        self.criterion = nn.MSELoss()
        self.epoch_losses = {'train_loss': [], 'val_loss': []}

    def forward(self, user_text, item_text):
        user_embedding = self.user_model.encode(user_text, convert_to_tensor=True).to(device)
        item_embedding = self.item_model.encode(item_text, convert_to_tensor=True).to(device)

        user_output = self.user_fc(user_embedding)
        item_output = self.item_fc(item_embedding)

        dot_product = torch.matmul(user_output.unsqueeze(1), item_output.unsqueeze(2)).squeeze()
        dot_product = 4 * torch.sigmoid(dot_product) + 1

        return dot_product




    def training_step(self, batch, batch_idx):
        users, items, ratings = batch

        users = [user_texts[i] for i in users.tolist()]
        items = [item_texts[i] for i in items.tolist()]

        preds = self(users, items)

        loss = self.criterion(preds, ratings)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        users, items, ratings = batch

        users = [user_texts[i] for i in users.tolist()]
        items = [item_texts[i] for i in items.tolist()]

        preds = self(users, items)

        loss = self.criterion(preds, ratings)
        self.log('val_loss', loss)
        return loss

    def full_predict(self, user_ids, item_ids):
        users = [user_texts[i] for i in user_ids.tolist()]
        items = [item_texts[i] for i in item_ids.tolist()]
        user_embedding = self.user_model.encode(users, convert_to_tensor=True).to(self.device)
        item_embeddings = torch.stack([self.item_model.encode(item_text, convert_to_tensor=True) for item_text in items]).to(self.device)

        user_output = self.user_fc(user_embedding)
        item_output = self.item_fc(item_embeddings)

        dot_product = torch.matmul(user_output, item_output.T)
        dot_product = 4 * torch.sigmoid(dot_product) + 1

        return dot_product

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=1e-4)

In [113]:
model = ImprovedTwoTowerModel(user_model_name='paraphrase-MiniLM-L6-v2', item_model_name='paraphrase-MiniLM-L6-v2')

# Define the ModelCheckpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',  # Metric to monitor
    dirpath='checkpoints/',  # Directory to save the checkpoints
    filename='no-history-best-checkpoint',  # Filename for the best model
    save_top_k=1,  # Save only the top 1 model
    mode='min'  # Mode to save the best model (min for validation loss)
)

trainer = pl.Trainer(max_epochs=1, log_every_n_steps=1, callbacks=[PrintLossesCallback(), checkpoint_callback], enable_progress_bar=True)
trainer.fit(model, train_dataloader, val_dataloader)

# Print losses after training completes
print("Epoch losses:")
for epoch in range(trainer.max_epochs):
    train_loss = model.epoch_losses['train_loss'][epoch] if epoch < len(model.epoch_losses['train_loss']) else 'N/A'
    val_loss = model.epoch_losses['val_loss'][epoch] if epoch < len(model.epoch_losses['val_loss']) else 'N/A'
    print(f"Epoch {epoch + 1}: Train Loss: {train_loss}, Val Loss: {val_loss}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
D:\Anaconda\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:652: Checkpoint directory D:\Recommendation System Project\LLM-BASED RS\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params | Mode 
-----------------------------------------------------------
0 | user_model | SentenceTransformer | 22.7 M | train
1 | item_model | SentenceTransformer | 22.7 M | train
2 | user_fc    | Sequential          | 49.6 K | train
3 | item_fc    | Sequential          | 49.6 K | train
4 | criterion  | MSELoss             | 0      | train
-----------------------------------------------------------
45.5 M    Trainable params
0         Non-trainable params
45.5 M    Total params
182.103   Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 12.77it/s]

D:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:475: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
D:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 1: Val Loss: 1.1764382123947144
                                                                           

D:\Anaconda\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 18753/18753 [10:18<00:00, 30.33it/s, v_num=3]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/6251 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/6251 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 1/6251 [00:00<03:48, 27.37it/s][A
Validation DataLoader 0:   0%|          | 2/6251 [00:00<03:43, 27.94it/s][A
Validation DataLoader 0:   0%|          | 3/6251 [00:00<03:34, 29.09it/s][A
Validation DataLoader 0:   0%|          | 4/6251 [00:00<03:35, 28.99it/s][A
Validation DataLoader 0:   0%|          | 5/6251 [00:00<03:36, 28.89it/s][A
Validation DataLoader 0:   0%|          | 6/6251 [00:00<03:40, 28.29it/s][A
Validation DataLoader 0:   0%|          | 7/6251 [00:00<03:37, 28.73it/s][A
Validation DataLoader 0:   0%|          | 8/6251 [00:00<03:35, 29.02it/s][A
Validation DataLoader 0:   0%|          | 9/6251 [00:00<03:34, 29.17it/s][A
Validation DataLoader 0:   0%|          | 10/6251 [00:00<03:3

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 18753/18753 [13:40<00:00, 22.86it/s, v_num=3]
Epoch losses:
Epoch 1: Train Loss: 1.1226508617401123, Val Loss: 1.1764382123947144


In [15]:
model.epoch_losses

{'train_loss': [1.0296480655670166,
  1.3504531383514404,
  0.9945042133331299,
  0.9941524267196655,
  1.199732780456543],
 'val_loss': [1.1964757442474365,
  1.2818604707717896,
  1.2327672243118286,
  1.209013819694519,
  1.1945436000823975,
  1.18431556224823]}

# Evaluation

In [12]:
# Assuming the training part has been done already, load the best model checkpoint
# best_model_path = './lightning_logs/paraphrase-MiniLM-L6-v2/binarized/no-history_5-epochs_lr-1e-5/checkpoints/epoch=4-step=62320.ckpt'  # Path where the best model is saved
best_model_path = './lightning_logs/version_2/checkpoints/epoch=4-step=62320.ckpt'  # Path where the best model is saved
best_model = TwoTowerModel.load_from_checkpoint(best_model_path, user_model_name='paraphrase-MiniLM-L6-v2', item_model_name='paraphrase-MiniLM-L6-v2').to(device)



In [34]:
def get_top_n_items_without_history(model, userId, n):
    # Ensure the model is in evaluation mode
    model.eval()

    # Get the user text for the given userId
    user_text = user_features_dict[userId]

    # Encode the user text
    user_embedding = model.user_model.encode(user_text, convert_to_tensor=True)

    # Compute the scores (dot product between user embedding and each item embedding)
    user_output = model.user_fc(user_embedding)
    item_output = model.item_fc(full_items_embeddings)
    dot_product = torch.matmul(user_output, item_output.t()).squeeze()
    # scores =  4 * torch.sigmoid(dot_product) + 1
    # Get the top n item indices and their scores
    top_n_scores, top_n_indices = torch.topk(dot_product, n)

    # Map indices back to item IDs and convert scores to a pandas Series
    top_n_item_ids = [list(movie_id_to_idx.keys())[list(movie_id_to_idx.values()).index(idx)] for idx in top_n_indices.tolist()]
    top_n_scores = top_n_scores.cpu().detach().numpy()

    top_n_series = pd.Series(data=top_n_scores, index=top_n_item_ids)

    return top_n_series

In [39]:
best_model.to('cpu')

TwoTowerModel(
  (user_model): SentenceTransformer(
    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  )
  (item_model): SentenceTransformer(
    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  )
  (user_fc): Linear(in_features=384, out_features=384, bias=True)
  (item_fc): Linear(in_features=3

In [32]:
full_items_embeddings = torch.stack([model.item_model.encode(item_text, convert_to_tensor=True) for item_text in item_texts])

In [40]:
user_ids = test_ratings['user_id'].unique()  # List of user IDs in the test dataset
n = 5  # Number of recommendations per user
recommendations = {}

for user_id in user_ids:
    recommendations[user_id] = get_top_n_items_without_history(best_model, user_id, n)

## Evaluation with groupby

In [48]:
# Prepare test data for NDCG calculation
test_ratings['predicted_rating_no_history'] = test_ratings.apply(lambda row: recommendations.get(row['user_id'], pd.Series()).get(row['item_id'], 0), axis=1)
test_ratings_grouped = test_ratings.groupby('user_id')

In [50]:
from sklearn.metrics import ndcg_score

ndcg_scores_two_tower = []

for user, group in test_ratings_grouped:
    if len(group) > 1:  # Filter users with more than one rating
        true_ratings = group['rating'].values
        pred_ratings = group['predicted_rating_no_history'].values
        ndcg_scores_two_tower.append(ndcg_score([true_ratings], [pred_ratings], k=5))


## Evaluation full from github

In [53]:
import torch
import numpy as np

class Metric(object):
    def __init__(self, k):
        self.k = k

    def recall(self, test_data, r, k):
        right_pred = r[:, :k].sum(1)
        recall_n = np.array([1 if test_data[i].item() > 0 else 0 for i in range(len(test_data))])
        recall = np.sum(right_pred / recall_n)
        return recall

    def precision(self, r, k):
        right_pred = r[:, :k].sum(1)
        precis_n = k
        precision = np.sum(right_pred) / precis_n
        return precision

    def mrr(self, r, k):
        pred_data = r[:, :k]
        scores = 1. / np.arange(1, k + 1)
        pred_data = pred_data * scores
        pred_data = pred_data.sum(1)
        return np.sum(pred_data)

    def ndcg(self, test_data, r, k):
        assert len(r) == len(test_data)
        pred_data = r[:, :k]

        test_matrix = np.zeros((len(pred_data), k))
        for i, items in enumerate(test_data):
            length = min(k, len(pred_data[i]))
            test_matrix[i, :length] = 1
        max_r = test_matrix
        idcg = np.sum(max_r * 1. / np.log2(np.arange(2, k + 2)), axis=1)
        dcg = pred_data * (1. / np.log2(np.arange(2, k + 2)))
        dcg = np.sum(dcg, axis=1)
        idcg[idcg == 0.] = 1.
        ndcg = dcg / idcg
        ndcg[np.isnan(ndcg)] = 0.
        return np.sum(ndcg)

    def get_label(self, test_data, pred_data):
        r = []
        for i in range(len(test_data)):
            ground_true = test_data[i]
            predict_topk = pred_data[i]

            pred = list(map(lambda x: x in ground_true, predict_topk))
            pred = np.array(pred).astype("float")
            r.append(pred)
        return np.array(r).astype('float')

    def eval_batch(self, data, topks):
        sorted_items = data[0].numpy()
        ground_true = data[1]

        r = self.get_label(ground_true, sorted_items)
        result = {}
        for k in topks:
            result[f'recall@{k}'] = self.recall(ground_true, r, k)
            result[f'precision@{k}'] = self.precision(r, k)
            result[f'mrr@{k}'] = self.mrr(r, k)
            result[f'ndcg@{k}'] = self.ndcg(ground_true, r, k)
        return result

    def eval(self, model, test_dataloader, topks=[5]):
        result = {f'recall@{k}': 0 for k in topks}
        result.update({f'precision@{k}': 0 for k in topks})
        result.update({f'mrr@{k}': 0 for k in topks})
        result.update({f'ndcg@{k}': 0 for k in topks})

        batch_ratings = []
        ground_truths = []
        test_user_num = len(test_dataloader.dataset)

        for tem in test_dataloader:
            user_ids = tem[0].cpu().numpy().tolist()
            item_ids = tem[1].cpu().numpy().tolist()
            batch_data = [torch.tensor(user_ids), torch.tensor(item_ids)]

            with torch.no_grad():
                batch_pred = model.full_predict(batch_data[0], batch_data[1])

            _, batch_rate = torch.topk(batch_pred, k=max(topks))
            batch_ratings.append(batch_rate.cpu())

            # Assuming test_labels is a dictionary with user_id as keys and list of item_ids as values
            ground_truth = [test_labels[user_idx] for user_idx in user_ids]
            ground_truths.append(ground_truth)

        data_pair = zip(batch_ratings, ground_truths)
        eval_results = []
        for _data in data_pair:
            eval_results.append(self.eval_batch(_data, topks))

        for batch_result in eval_results:
            for metric in batch_result:
                result[metric] += batch_result[metric] / test_user_num

        return result


## Old Evaluation high

In [54]:
def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
    return 0.

def ndcg_at_k(r, k):
    dcg_max = EvaluateMetrics.dcg_at_k(sorted(r, reverse=True), k)
    if not dcg_max:
        return 0.
    return EvaluateMetrics.dcg_at_k(r, k) / dcg_max

def mrr_at_k(relevance_scores, k):
    for i, rel in enumerate(relevance_scores[:k]):
        if rel > 0:
            return 1 / (i + 1)
    return 0.

def hr_at_k(relevance_scores, k):
    return int(np.any(np.asarray(relevance_scores)[:k] > 0))

In [55]:
def evaluate_recommendations(recommendations, k):
    ndcg_scores = []
    mrr_scores = []
    hr_scores = []

    for user_id, user_recommendations in recommendations.items():
        true_ratings = test_ratings[
            (test_ratings['user_id'] == user_id) & (
                test_ratings['item_id'].isin(user_recommendations.index))]

        if true_ratings.empty:
            continue

        true_ratings = true_ratings.set_index('item_id').reindex(user_recommendations.index)['rating'].fillna(0)

        relevance = true_ratings.values / 5  # Assuming ratings are from 1 to 5

        ndcg_scores.append(ndcg_at_k(relevance, k))
        mrr_scores.append(mrr_at_k(relevance, k))
        hr_scores.append(hr_at_k(relevance, k))

    average_ndcg = np.mean(ndcg_scores)
    average_mrr = np.mean(mrr_scores)
    average_hr = np.mean(hr_scores)

    return {
        'NDCG@k': average_ndcg,
        'MRR@k': average_mrr,
        'HR@k': average_hr
    }

## Old Evaluation low

In [56]:
def evaluate_recommendation_without_history(recommendations, k):
    ndcg_scores = []
    mrr_scores = []
    hr_scores = []

    for user_id in recommendations.keys():
        user_recommendations = recommendations[user_id]
        # Filter test ratings for the current user
        user_test_ratings = test_ratings[test_ratings['user_id'] == user_id]

        relevance_scores = []
        for item_id in user_recommendations.index:
            if item_id in user_test_ratings['item_id'].values:
                relevance_scores.append(
                    user_test_ratings[user_test_ratings['item_id'] == item_id]['rating'].values[0] / 5)
            else:
                relevance_scores.append(0)

        # Calculate metrics
        ndcg_scores.append(ndcg_at_k(relevance_scores, k))
        mrr_scores.append(mrr_at_k(relevance_scores, k))
        hr_scores.append(hr_at_k(relevance_scores, k))

    average_ndcg = np.mean(ndcg_scores)
    average_mrr = np.mean(mrr_scores)
    average_hr = np.mean(hr_scores)

    return {
        'NDCG@k': average_ndcg,
        'MRR@k': average_mrr,
        'HR@k': average_hr
    }

## Calculations

In [57]:
two_tower_ndcg = np.mean(ndcg_scores_two_tower)
print("Two-Tower Model NDCG:", two_tower_ndcg)

Two-Tower Model NDCG: 0.8583484839454232


In [58]:
model.to('cuda')  # Ensure the model is on the correct device

# Initialize the metric evaluator
metric_evaluator = Metric(k=[5])

# Evaluate the model
eval_result = metric_evaluator.eval(best_model, test_dataloader, topks=[5])

# Print the results
for metric, values in eval_result.items():
    print(f"{metric}: {values}")

  recall = np.sum(right_pred / recall_n)
  recall = np.sum(right_pred / recall_n)


recall@5: nan
precision@5: 0.016173765801535685
mrr@5: 0.03694372269095642
ndcg@5: 0.01617840335064259


In [33]:
model.to('cuda')  # Ensure the model is on the correct device

# Initialize the metric evaluator
metric_evaluator = Metric(k=[5])

# Evaluate the model
eval_result = metric_evaluator.eval(best_model, test_dataloader, topks=[5])

# Print the results
for metric, values in eval_result.items():
    print(f"{metric}: {values}")

recall@5: 0.157731876305978
precision@5: 0.031546375261194276
mrr@5: 0.07256001239739744
ndcg@5: 0.031679775942507274


In [59]:
# Evaluate the recommendations
# evaluator = EvaluateMetrics(test_ratings)
evaluation_results = evaluate_recommendations(recommendations, k=n)

print(f"NDCG@{n}: {evaluation_results['NDCG@k']:.4f}")
print(f"MRR@{n}: {evaluation_results['MRR@k']:.4f}")
print(f"HR@{n}: {evaluation_results['HR@k']:.4f}")

NDCG@5: 0.5157
MRR@5: 0.3862
HR@5: 0.9231


In [39]:
from evaluator import EvaluateMetrics

# Evaluate the recommendations
# evaluator = EvaluateMetrics(test_ratings)
evaluation_results = evaluate_recommendations(recommendations, k=n)

print(f"NDCG@{n}: {evaluation_results['NDCG@k']:.4f}")
print(f"MRR@{n}: {evaluation_results['MRR@k']:.4f}")
print(f"HR@{n}: {evaluation_results['HR@k']:.4f}")

NDCG@5: 0.7080
MRR@5: 0.6181
HR@5: 1.0000


In [60]:
evaluation_results_without_history = evaluate_recommendation_without_history(recommendations, 5)
print(f"Without History - NDCG@5: {evaluation_results_without_history['NDCG@k']:.4f}")
print(f"Without History - MRR@5: {evaluation_results_without_history['MRR@k']:.4f}")
print(f"Without History - HR@5: {evaluation_results_without_history['HR@k']:.4f}")

Without History - NDCG@5: 0.0089
Without History - MRR@5: 0.0067
Without History - HR@5: 0.0159


In [76]:
evaluation_results_without_history = evaluate_recommendation_without_history(recommendations, 5)
print(f"Without History - NDCG@5: {evaluation_results_without_history['NDCG@k']:.4f}")
print(f"Without History - MRR@5: {evaluation_results_without_history['MRR@k']:.4f}")
print(f"Without History - HR@5: {evaluation_results_without_history['HR@k']:.4f}")

Without History - NDCG@5: 0.6143
Without History - MRR@5: 0.4914
Without History - HR@5: 1.0000


In [1]:
# from evaluator import EvaluateMetrics
#
# # Evaluate the recommendations
# evaluator = EvaluateMetrics(test_ratings)
# evaluation_results = evaluator.evaluate_recommendation_without_history(recommendations, k=n)
#
# print(f"NDCG@{n}: {evaluation_results['NDCG@k']:.4f}")
# print(f"MRR@{n}: {evaluation_results['MRR@k']:.4f}")
# print(f"HR@{n}: {evaluation_results['HR@k']:.4f}")

paraphrase-MiniLM-L6-v2 5 epochs

NDCG@5: 0.7080
MRR@5: 0.6181
HR@5: 1.0000

In [13]:
def get_top_n_items_without_history_unseen_items(model, userId, n):
    # Ensure the model is in evaluation mode
    model.eval()

    # Get the user text for the given userId
    user_text = user_features_dict[userId]

    # Encode the user text
    user_embedding = model.user_model.encode(user_text, convert_to_tensor=True).to(device)

    # Compute the scores (dot product between user embedding and each item embedding)
    user_output = model.user_fc(user_embedding).to(device)
    item_output = model.item_fc(full_items_embeddings).to(device)
    dot_product = torch.matmul(user_output, item_output.t()).squeeze()

    # Get items the user has seen in the training and validation data
    seen_items_train = train_ratings[train_ratings['user_id'] == userId]['item_id'].values
    seen_items_val = val_ratings[val_ratings['user_id'] == userId]['item_id'].values
    seen_items = set(np.concatenate((seen_items_train, seen_items_val)))

    # Get the top n + len(seen_items) item indices and their scores
    top_n_scores, top_n_indices = torch.topk(dot_product, n + len(seen_items))

    # Map indices back to item IDs
    top_n_item_ids = [list(movie_id_to_idx.keys())[list(movie_id_to_idx.values()).index(idx.item())] for idx in top_n_indices]

    # Filter out seen items
    unseen_top_n_item_ids = [item for item in top_n_item_ids if item not in seen_items]

    return unseen_top_n_item_ids[:n]

In [14]:
# Assuming full_items_embeddings is already defined
full_items_embeddings = torch.stack([best_model.item_model.encode(item_text, convert_to_tensor=True) for item_text in item_texts]).to(device)

In [15]:
def dcg(scores, k):
    scores = np.asfarray(scores)[:k]
    return np.sum(scores / np.log2(np.arange(2, scores.size + 2)))

def ndcg_at_k(labels, k):
    ideal_labels = sorted(labels, reverse=True)
    return dcg(labels, k) / dcg(ideal_labels, k)

def evaluate_user_cf_model(model, test_data, train_data, val_data, all_items, k):
    ndcg_scores = []

    # Get unique users
    unique_users = test_data['user_id'].unique()

    for user in unique_users:
        # Get the top N items for the user, filtering out seen items
        recommended_items = get_top_n_items_without_history_unseen_items(model, user, k)

        user_test_data = test_data[test_data['user_id'] == user]
        test_items = user_test_data['item_id'].values
        print(user)
        y_score = [1 if item in test_items else 0 for item in recommended_items]
        ndcg = ndcg_at_k(y_score, k)
        ndcg_scores.append(ndcg)

    # avg_ndcg = np.mean(np.nan_to_num(ndcg_scores, nan=0.0))
    avg_ndcg = np.nanmean(ndcg_scores)

    return {
        'NDCG@{}'.format(k): avg_ndcg
    }

all_items = movies['item_id'].unique()
# Evaluate the model
eval_result = evaluate_user_cf_model(best_model, test_ratings, train_ratings, val_ratings, all_items, k=5)
print(eval_result)

1
2
3
4
5
6
7
8
9
10


  return dcg(labels, k) / dcg(ideal_labels, k)


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
2

In [16]:
def dcg(scores, k):
    scores = np.asfarray(scores)[:k]
    return np.sum(scores / np.log2(np.arange(2, scores.size + 2)))

def ndcg_at_k(labels, k):
    ideal_labels = sorted(labels, reverse=True)
    return dcg(labels, k) / dcg(ideal_labels, k)

def evaluate_user_cf_model(model, test_data, train_data, val_data, all_items, k):
    ndcg_scores = []

    # Get unique users
    unique_users = test_data['user_id'].unique()

    for user in unique_users:
        # Get the top N items for the user, filtering out seen items
        recommended_items = get_top_n_items_without_history_unseen_items(model, user, k)

        user_test_data = test_data[test_data['user_id'] == user]
        test_items = user_test_data['item_id'].values
        print(user)
        y_score = [
            1 if (item in test_items and user_test_data[user_test_data['item_id'] == item]['rating'].values[0] == 1) else 0
            for item in recommended_items
        ]
        ndcg = ndcg_at_k(y_score, k)
        ndcg_scores.append(ndcg)

    # avg_ndcg = np.mean(np.nan_to_num(ndcg_scores, nan=0.0))
    avg_ndcg = np.nanmean(ndcg_scores)

    return {
        'NDCG@{}'.format(k): avg_ndcg
    }

all_items = movies['item_id'].unique()
# Evaluate the model
eval_result = evaluate_user_cf_model(best_model, test_ratings, train_ratings, val_ratings, all_items, k=5)
print(eval_result)

1
2
3
4
5
6
7
8
9


  return dcg(labels, k) / dcg(ideal_labels, k)


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
28