In [2]:
import pandas as pd
from lightfm import LightFM
from lightfm.data import Dataset as LightFMDataset
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score
import joblib
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [5]:
# Load the MovieLens dataset
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
dataset_path = '/content/ml-latest-small.zip'

# Download and unzip the dataset
!wget -nc $url -O $dataset_path
!unzip -n $dataset_path -d /content/




--2024-07-02 20:33:44--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘/content/ml-latest-small.zip’


2024-07-02 20:33:45 (1.31 MB/s) - ‘/content/ml-latest-small.zip’ saved [978202/978202]

Archive:  /content/ml-latest-small.zip
   creating: /content/ml-latest-small/
  inflating: /content/ml-latest-small/links.csv  
  inflating: /content/ml-latest-small/tags.csv  
  inflating: /content/ml-latest-small/ratings.csv  
  inflating: /content/ml-latest-small/README.txt  
  inflating: /content/ml-latest-small/movies.csv  


In [6]:
# Load data into pandas dataframes
ratings = pd.read_csv('/content/ml-latest-small/ratings.csv')
movies = pd.read_csv('/content/ml-latest-small/movies.csv')

In [7]:
# Load the whole model
deployed_model = joblib.load('lightfm_model.pkl')


# Function to calculate item similarities (cosine similarity)

In [8]:
def calculate_item_similarity(item_id, all_item_embeddings):
    item_embedding = all_item_embeddings[item_id]
    similarities = cosine_similarity([item_embedding], all_item_embeddings)[0]
    similar_item_ids = similarities.argsort()[::-1][1:11]  # Exclude the item itself
    return similar_item_ids


In [9]:
item_id = 1
similar_items = calculate_item_similarity(item_id, deployed_model.item_embeddings)
print(f'Top 10 similar items to item {item_id}: {similar_items}')



Top 10 similar items to item 1: [ 689   23  652  314  696 1483  533  707  505 1560]


# Function to recommend top-N movies for a given user

In [10]:
def recommend_movies_for_user(user_id, model, num_items, num_recommendations=10):
    """
    Recommend top-N movies for a given user.

    Parameters:
    - user_id: ID of the user for whom to recommend movies
    - model: Trained LightFM model
    - num_items: Total number of items in the dataset
    - num_recommendations: Number of movie recommendations to return

    Returns:
    - recommended_items: List of recommended item IDs
    """
    # Calculate the scores for all items for the user
    scores = model.predict(user_ids=np.array([user_id] * num_items), item_ids=np.arange(num_items))

    # Get the indices of the items with the highest scores
    recommended_items = np.argsort(-scores)[:num_recommendations]

    return recommended_items

In [11]:
# Example usage
user_id = 3
num_items = deployed_model.item_embeddings.shape[0]
recommended_movies = recommend_movies_for_user(user_id, deployed_model, num_items)
print(f'Top {len(recommended_movies)} recommended movies for user {user_id}: {recommended_movies}')


Top 10 recommended movies for user 3: [764 166 753 192 244  20 238 744 774 232]
