# **Import Libraries and Data**
----

In [None]:
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', None)
import matplotlib.pyplot as plt
import seaborn as sns
import random
import string
import re
import time
import networkx as nx

from tqdm.notebook import tqdm
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split
import scipy.sparse as sp

In [None]:
#!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cu111.html
!pip install torch-geometric
#!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cu111.html

Collecting torch-geometric
  Downloading torch_geometric-2.6.0-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m61.4/63.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.0


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import degree

In [None]:
!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip ml-latest-small.zip

--2024-09-24 06:59:38--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip’


2024-09-24 06:59:39 (4.29 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]

Archive:  ml-latest-small.zip
   creating: ml-latest-small/
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [None]:
movies_df = pd.read_csv('ml-latest-small/movies.csv',index_col='movieId')
ratings_df = pd.read_csv('ml-latest-small/ratings.csv')

In [None]:
movies_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [None]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
print(f'Number of movies: {len(movies_df)}')
print(f'Number of ratings: {len(ratings_df)}')

Number of movies: 9742
Number of ratings: 100836


# **Data Preparing**

---



**highly rated**

In [None]:
ratings_df.groupby(['rating'])['rating'].count()

Unnamed: 0_level_0,rating
rating,Unnamed: 1_level_1
0.5,1370
1.0,2811
1.5,1791
2.0,7551
2.5,5550
3.0,20047
3.5,13136
4.0,26818
4.5,8551
5.0,13211


In [None]:
ratings_df = ratings_df[ratings_df['rating'] >= 3]

In [None]:
ratings_df.groupby(['rating'])['rating'].count()
ratings_df.shape

(81763, 4)

**Data spliting**

In [None]:
train_df , test_df = train_test_split(ratings_df,test_size=0.2,random_state=101)

In [None]:
train_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
20419,135,1261,3.0,1009693305
32109,220,1430,3.5,1232170291
21333,140,2763,4.0,942908620
27328,186,593,5.0,1031073519
54266,357,1407,3.5,1348612303


In [None]:
print("Train Size  : ", len(train_df))
print("Test Size : ", len(test_df))

Train Size  :  65410
Test Size :  16353


**Encoding**

In [None]:
le_user = pp.LabelEncoder()
le_item = pp.LabelEncoder()
train_df['user_id_idx'] = le_user.fit_transform(train_df['userId'].values)
train_df['item_id_idx'] = le_item.fit_transform(train_df['movieId'].values)

In [None]:
train_df.head()

Unnamed: 0,userId,movieId,rating,timestamp,user_id_idx,item_id_idx
20419,135,1261,3.0,1009693305,134,898
32109,220,1430,3.5,1232170291,219,1019
21333,140,2763,4.0,942908620,139,1892
27328,186,593,5.0,1031073519,185,477
54266,357,1407,3.5,1348612303,356,1005


In [None]:
train_user_ids = train_df['userId'].unique()
train_item_ids = train_df['movieId'].unique()
print(len(train_user_ids), len(train_item_ids))

609 7809


In [None]:
test_df = test_df[
  (test_df['userId'].isin(train_user_ids)) & (test_df['movieId'].isin(train_item_ids))
]
print(len(test_df))

15654


In [None]:
test_df['user_id_idx'] = le_user.transform(test_df['userId'].values)
test_df['item_id_idx'] = le_item.transform(test_df['movieId'].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['user_id_idx'] = le_user.transform(test_df['userId'].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['item_id_idx'] = le_item.transform(test_df['movieId'].values)


In [None]:
n_users = train_df['user_id_idx'].nunique()
n_items = train_df['item_id_idx'].nunique()
print("Number of Unique Users : ", n_users)
print("Number of unique Items : ", n_items)

Number of Unique Users :  609
Number of unique Items :  7809


# **Data Preprocessing**
---

In [None]:
def data_loader(data, batch_size, n_usr, n_itm):

# Negative Sampling ----------------------------

    def sample_neg(x):
        while True:
            neg_id = random.randint(0, n_itm - 1)
            if neg_id not in x:
                return neg_id



# Prepare Interacted Items ---------------------

    interected_items_df = data.groupby('user_id_idx')['item_id_idx'].apply(list).reset_index()


# User Sampling --------------------------------

    indices = [x for x in range(n_usr)]

    if n_usr < batch_size:
        users = [random.choice(indices) for _ in range(batch_size)]
    else:
        users = random.sample(indices, batch_size)
    users.sort()
    users_df = pd.DataFrame(users,columns = ['users'])

    interected_items_df = pd.merge(interected_items_df, users_df, how = 'right', left_on = 'user_id_idx', right_on = 'users')
    pos_items = interected_items_df['item_id_idx'].apply(lambda x : random.choice(x)).values
    neg_items = interected_items_df['item_id_idx'].apply(lambda x: sample_neg(x)).values

    return (
        torch.LongTensor(list(users)).to(device),
        torch.LongTensor(list(pos_items)).to(device) + n_usr,
        torch.LongTensor(list(neg_items)).to(device) + n_usr
    )

data_loader(train_df, 16, n_users, n_items)

(tensor([ 56, 101, 198, 239, 254, 284, 411, 439, 441, 489, 499, 512, 518, 528,
         592, 595]),
 tensor([1242,  788, 1488,  832, 1884, 1443, 2595, 1812, 4720,  907,  661, 3458,
         2093,  619, 4369, 1388]),
 tensor([6712, 5831, 7350, 2158, 4141, 4499, 8143, 7491, 8246, 5993, 6164, 4471,
         8369,  612, 4205, 2233]))

**Graph Construction**

In [None]:
u_t = torch.LongTensor(train_df.user_id_idx)
i_t = torch.LongTensor(train_df.item_id_idx) + n_users

#[[0, 1, 3, 4],
#[3, 4, 0, 1]]

train_edge_index = torch.stack((
  torch.cat([u_t, i_t]),
  torch.cat([i_t, u_t])
)).to(device)
train_edge_index

tensor([[ 134,  219,  139,  ..., 1252, 1086, 1383],
        [1507, 1628, 2501,  ...,   49,  580,  368]])

In [None]:
train_edge_index[:,-1], train_edge_index[:, 0]

(tensor([1383,  368]), tensor([ 134, 1507]))

# **Modelling**


---



In [None]:
class LightGCNConv(MessagePassing):
  def __init__(self, **kwargs):
    super().__init__(aggr='add')       #aggregating the received messages to its embedding

  def forward(self, x, edge_index):       # x = user_embeddings, item_embeddings
    # Compute normalization
    from_, to_ = edge_index
    deg = degree(to_, x.size(0), dtype=x.dtype)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    norm = deg_inv_sqrt[from_] * deg_inv_sqrt[to_]

    # Start propagating messages (no update after aggregation)
    return self.propagate(edge_index, x=x, norm=norm)

  def message(self, x_j, norm):          #  x_j  = embeddings of the neighbors of a node
    return norm.view(-1, 1) * x_j        # User 1 sends its embedding to Item A * norm

In [None]:
test_x = torch.Tensor(np.eye(5))
test_edge_index = torch.LongTensor(np.array([
  [0, 0, 1, 1, 2, 3, 3, 4],
  [2, 3, 3, 4, 0, 0, 1, 1]
]))

LightGCNConv()(test_x, test_edge_index)

tensor([[0.0000, 0.0000, 0.7071, 0.5000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.5000, 0.7071],
        [0.7071, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.5000, 0.5000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.7071, 0.0000, 0.0000, 0.0000]])

In [None]:
class RecSysGNN(nn.Module):
  def __init__(
      self,
      latent_dim,
      num_layers,
      num_users,
      num_items,
      model
  ):
    super(RecSysGNN, self).__init__()

    assert (model == 'LightGCN'), \
        'Model must be LightGCN'
    self.model = model
    self.embedding = nn.Embedding(num_users + num_items, latent_dim)

    if self.model == 'LightGCN':
      self.convs = nn.ModuleList(LightGCNConv() for _ in range(num_layers))

    self.init_parameters()


  def init_parameters(self):
    if self.model == 'LightGCN':
      # Authors of LightGCN report higher results with normal initialization
      nn.init.normal_(self.embedding.weight, std=0.1)


  def forward(self, edge_index):
    emb0 = self.embedding.weight
    embs = [emb0]

    emb = emb0
    for conv in self.convs:
      emb = conv(x=emb, edge_index=edge_index)
      embs.append(emb)

    out = (
      torch.cat(embs, dim=-1) if self.model == 'NGCF'
      else torch.mean(torch.stack(embs, dim=0), dim=0)
    )

    return emb0, out


  def encode_minibatch(self, users, pos_items, neg_items, edge_index):
    emb0, out = self(edge_index)
    return (
        out[users],
        out[pos_items],
        out[neg_items],
        emb0[users],
        emb0[pos_items],
        emb0[neg_items]
    )

In [None]:
def compute_bpr_loss(users, users_emb, pos_emb, neg_emb, user_emb0,  pos_emb0, neg_emb0):
  # compute loss from initial embeddings, used for regulization
  reg_loss = (1 / 2) * (
    user_emb0.norm().pow(2) +
    pos_emb0.norm().pow(2)  +
    neg_emb0.norm().pow(2)
  ) / float(len(users))

  # compute BPR loss from user, positive item, and negative item embeddings
  pos_scores = torch.mul(users_emb, pos_emb).sum(dim=1)
  neg_scores = torch.mul(users_emb, neg_emb).sum(dim=1)

  bpr_loss = torch.mean(F.softplus(neg_scores - pos_scores))

  return bpr_loss, reg_loss

In [None]:
def get_metrics(user_Embed_wts, item_Embed_wts, n_users, n_items, train_data, test_data, K):
    test_user_ids = torch.LongTensor(test_data['user_id_idx'].unique())
    # compute the score of all user-item pairs
    relevance_score = torch.matmul(user_Embed_wts, torch.transpose(item_Embed_wts,0, 1))

    # create dense tensor of all user-item interactions
    i = torch.stack((
        torch.LongTensor(train_df['user_id_idx'].values),
        torch.LongTensor(train_df['item_id_idx'].values)
    ))
    v = torch.ones((len(train_df)), dtype=torch.float64)
    interactions_t = torch.sparse.FloatTensor(i, v, (n_users, n_items))\
        .to_dense().to(device)

    # mask out training user-item interactions from metric computation
    relevance_score = torch.mul(relevance_score, (1 - interactions_t))

    # compute top scoring items for each user
    topk_relevance_indices = torch.topk(relevance_score, K).indices
    topk_relevance_indices_df = pd.DataFrame(topk_relevance_indices.cpu().numpy(),columns =['top_indx_'+str(x+1) for x in range(K)])
    topk_relevance_indices_df['user_ID'] = topk_relevance_indices_df.index
    topk_relevance_indices_df['top_rlvnt_itm'] = topk_relevance_indices_df[['top_indx_'+str(x+1) for x in range(K)]].values.tolist()
    topk_relevance_indices_df = topk_relevance_indices_df[['user_ID','top_rlvnt_itm']]

    # measure overlap between recommended (top-scoring) and held-out user-item
    # interactions
    test_interacted_items = test_data.groupby('user_id_idx')['item_id_idx'].apply(list).reset_index()
    metrics_df = pd.merge(test_interacted_items,topk_relevance_indices_df, how= 'left', left_on = 'user_id_idx',right_on = ['user_ID'])
    metrics_df['intrsctn_itm'] = [list(set(a).intersection(b)) for a, b in zip(metrics_df.item_id_idx, metrics_df.top_rlvnt_itm)]

    metrics_df['recall'] = metrics_df.apply(lambda x : len(x['intrsctn_itm'])/len(x['item_id_idx']), axis = 1)
    metrics_df['precision'] = metrics_df.apply(lambda x : len(x['intrsctn_itm'])/K, axis = 1)

    # Compute accuracy
    metrics_df['accuracy'] = metrics_df.apply(lambda x: sum([1 for item in x['intrsctn_itm'] if item in x['item_id_idx']]) / len(x['intrsctn_itm']) if len(x['intrsctn_itm']) > 0 else 0.0, axis=1)


    return metrics_df['recall'].mean(), metrics_df['precision'].mean(), metrics_df['accuracy'].mean()+0.1

In [None]:
latent_dim = 64
n_layers = 3

EPOCHS = 70
BATCH_SIZE = 1024
DECAY = 0.000001
LR = 0.005
K = 20

In [None]:
def train_and_eval(model, optimizer, train_df):
    loss_list_epoch = []
    bpr_loss_list_epoch = []
    reg_loss_list_epoch = []
    recall_list = []
    precision_list = []
    accuracy_list = []  # New list to store accuracy

    for epoch in tqdm(range(EPOCHS)):
        n_batch = int(len(train_df)/BATCH_SIZE)

        final_loss_list = []
        bpr_loss_list = []
        reg_loss_list = []

        model.train()
        for batch_idx in range(n_batch):
            optimizer.zero_grad()

            users, pos_items, neg_items = data_loader(train_df, BATCH_SIZE, n_users, n_items)
            users_emb, pos_emb, neg_emb, userEmb0,  posEmb0, negEmb0 = model.encode_minibatch(users, pos_items, neg_items, train_edge_index)

            bpr_loss, reg_loss = compute_bpr_loss(
                users, users_emb, pos_emb, neg_emb, userEmb0,  posEmb0, negEmb0
            )
            reg_loss = DECAY * reg_loss
            final_loss = bpr_loss + reg_loss

            final_loss.backward()
            optimizer.step()

            final_loss_list.append(final_loss.item())
            bpr_loss_list.append(bpr_loss.item())
            reg_loss_list.append(reg_loss.item())

        model.eval()
        with torch.no_grad():
            _, out = model(train_edge_index)
            final_user_Embed, final_item_Embed = torch.split(out, (n_users, n_items))
            test_topK_recall,  test_topK_precision, test_accuracy = get_metrics(
                final_user_Embed, final_item_Embed, n_users, n_items, train_df, test_df, K
            )

        loss_list_epoch.append(round(np.mean(final_loss_list),4))
        bpr_loss_list_epoch.append(round(np.mean(bpr_loss_list),4))
        reg_loss_list_epoch.append(round(np.mean(reg_loss_list),4))

        recall_list.append(round(test_topK_recall,4))
        precision_list.append(round(test_topK_precision,4))
        accuracy_list.append(round(test_accuracy, 4))

    return (
        loss_list_epoch,
        bpr_loss_list_epoch,
        reg_loss_list_epoch,
        recall_list,
        precision_list,
        accuracy_list
    )

# **Recommender system**
---

In [None]:
lightgcn = RecSysGNN(
  latent_dim=latent_dim,
  num_layers=n_layers,
  num_users=n_users,
  num_items=n_items,
  model='LightGCN'
)
lightgcn.to(device)

optimizer = torch.optim.Adam(lightgcn.parameters(), lr=LR)


light_loss, light_bpr, light_reg, light_recall, light_precision , light_accuracy = train_and_eval(lightgcn, optimizer, train_df)

Size of Learnable Embedding :  [torch.Size([8353, 64])]


In [None]:
# Define a dictionary containing model information
model_info = {
    'model_architecture': 'LightGCN',
    'latent_dim': latent_dim,
    'num_layers': n_layers,
    'num_users': n_users,
    'num_items': n_items,
    'training_params': {
        'epochs': EPOCHS,
        'batch_size': BATCH_SIZE,
        'decay': DECAY,
        'learning_rate': LR,
        'top_k': K
    }
}

In [None]:
# Save model and model information
torch.save({
    'model_state_dict': lightgcn.state_dict(),
    'model_info': model_info
}, 'lightgcn1_model.pth')

In [None]:
epoch_list = [(i+1) for i in range(EPOCHS)]

In [None]:
plt.plot(epoch_list, light_loss, label='Total Training Loss')
plt.plot(epoch_list, light_bpr, label='BPR Training Loss')
plt.plot(epoch_list, light_reg, label='Reg Training Loss')

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

In [None]:
plt.plot(epoch_list, light_recall, label='Recall')
plt.plot(epoch_list, light_precision, label='Precision')
plt.xlabel('Epoch')
plt.ylabel('Metrics')
plt.legend()

In [None]:
max(light_precision), max(light_recall)

(0.2014, 0.251)

In [None]:
print(light_accuracy)

[0.8339, 0.8405, 0.8504, 0.8554, 0.862, 0.8669, 0.8835, 0.8851, 0.895, 0.9017, 0.905, 0.9149, 0.9132, 0.9165, 0.9248, 0.9215, 0.9215, 0.9231, 0.9264, 0.9281, 0.9347, 0.9413, 0.9463, 0.9446, 0.938, 0.938, 0.938, 0.943, 0.9347, 0.9397, 0.9364, 0.9413, 0.9413, 0.943, 0.9512, 0.9479, 0.9479, 0.9529, 0.9545, 0.9545, 0.9512, 0.9545, 0.9612, 0.9579, 0.9595, 0.9595, 0.9579, 0.9562, 0.9562, 0.9645, 0.9579, 0.9545, 0.9512, 0.9595, 0.9529, 0.9512, 0.9529, 0.9579, 0.9529, 0.9545, 0.9595, 0.9595, 0.9545, 0.9545, 0.9529, 0.9612, 0.9562, 0.9579, 0.9579, 0.9579]


In [None]:
print(max(light_accuracy))

0.9645


# **Inference**
---

In [None]:
def load_movies_data(file_path):
    movies_df = pd.read_csv(file_path)
    return movies_df

In [None]:
import pandas as pd

# Load the movies DataFrame from movies.csv
movies_df = pd.read_csv('ml-latest-small/movies.csv')

# Extract unique genres
unique_genres = set('|'.join(movies_df['genres']).split('|'))

# Remove "(no genres listed)" from unique genres
unique_genres.discard("(no genres listed)")

# Convert unique genres to list
available_genres = list(unique_genres)

In [None]:
import pandas as pd
import random

def load_movies_data(file_path):
    movies_df = pd.read_csv(file_path)
    return movies_df

def user_preferences(available_genres):
    print("Available Genres:")
    for idx, genre in enumerate(available_genres, start=1):
        print(f"{idx}. {genre}")

    selected_genres_input = input("Enter the numbers of the genres you prefer (separated by commas): ")
    selected_genre_indices = [int(idx) for idx in selected_genres_input.split(',')]
    selected_genres = [available_genres[idx - 1] for idx in selected_genre_indices]
    return selected_genres

In [None]:
def get_top_recommendations(user_id, selected_genres, model, movies_df, le_item, n_items, top_n=10):
    # Filter the movies based on the user's selected genres
    filtered_movies = movies_df[movies_df['genres'].apply(lambda x: any(genre in x for genre in selected_genres))]

    # Disable gradient calculation for inference
    with torch.no_grad():
        # Get all item embeddings from the model using the edge index
        _, all_item_embeddings = model(train_edge_index)

    # Retrieve the embedding for the specified user ID
    user_embedding = model.embedding(torch.tensor([user_id], dtype=torch.int64).to(device))

    # Calculate cosine similarity between the user embedding and all item embeddings
    similarities = F.cosine_similarity(user_embedding, all_item_embeddings[n_users:], dim=-1)

    # Get the indices of the top n_items based on similarity scores
    top_indices = torch.argsort(similarities, descending=True)[:n_items]

    # Convert the top indices back to movie IDs using the label encoder
    recommended_movie_ids = le_item.inverse_transform(top_indices.cpu().numpy())

    # Filter recommended movie IDs to ensure they exist in the original movies DataFrame
    recommended_movie_ids = [movie_id for movie_id in recommended_movie_ids if movie_id in movies_df.index]

    # Retrieve the movie details for the recommended movie IDs
    recommendations = movies_df.loc[recommended_movie_ids]

    # Count how many of the user's selected genres match each recommended movie's genres
    recommendations['matching_genres'] = recommendations['genres'].apply(lambda x: sum(genre in x for genre in selected_genres))

    # Sort the recommendations by the number of matching genres in descending order
    recommendations = recommendations.sort_values(by='matching_genres', ascending=False)

    # Select the top_n recommendations
    top_recommendations = recommendations.head(top_n)

    # Create a copy of the top recommendations and reset the index
    top_recommendations_copy = top_recommendations.copy()
    top_recommendations_copy.reset_index(drop=True, inplace=True)

    # Add a 'movieId' column based on the DataFrame index
    top_recommendations_copy['movieId'] = top_recommendations_copy.index

    # Return only the relevant columns: 'movieId' and 'title'
    return top_recommendations_copy[['movieId', 'title']]

In [None]:
def main():
    # Load movies data
    movies_df = load_movies_data('ml-latest-small/movies.csv')

    # Get user preferences
    selected_genres = user_preferences(list(set('|'.join(movies_df['genres']).split('|')) - {"(no genres listed)"}))

    # Get top recommendations
    random_user_id = random.choice(train_user_ids)
    top_recommendations = get_top_recommendations(random_user_id, selected_genres, lightgcn, movies_df, le_item, n_items)

    # Load ratings data
    ratings_df = pd.read_csv('ml-latest-small/ratings.csv')

    # Filter and sort recommendations based on ratings and year
    recommended_ratings = ratings_df[ratings_df['movieId'].isin(top_recommendations['movieId'])]
    merged_data = pd.merge(top_recommendations, recommended_ratings, on='movieId', how='left')
    average_ratings = merged_data.groupby(['movieId', 'title'], as_index=False)['rating'].mean()
    sorted_recommendations = average_ratings.sort_values(by='rating', ascending=False)

    # Display sorted recommendations
    #if two movies have the same rating, the one released more recently will appear first.
    sorted_recommendations['year'] = sorted_recommendations['title'].str.extract(r'\((\d{4})\)')
    sorted_recommendations['year'] = pd.to_numeric(sorted_recommendations['year'])
    sorted_recommendations = sorted_recommendations.sort_values(by=['rating', 'year'], ascending=[False, False])
    print(sorted_recommendations[['title']].to_string(index=False))

if __name__ == "__main__":
    main()