<a href="https://colab.research.google.com/github/Manan1811/Movie-Recommender-System--LightFM/blob/master/Movie_Recommender_Sys_using_LightFM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install lightfm

Collecting lightfm
[?25l  Downloading https://files.pythonhosted.org/packages/e9/8e/5485ac5a8616abe1c673d1e033e2f232b4319ab95424b42499fabff2257f/lightfm-1.15.tar.gz (302kB)
[K     |█                               | 10kB 18.2MB/s eta 0:00:01[K     |██▏                             | 20kB 1.7MB/s eta 0:00:01[K     |███▎                            | 30kB 2.3MB/s eta 0:00:01[K     |████▍                           | 40kB 2.5MB/s eta 0:00:01[K     |█████▍                          | 51kB 2.0MB/s eta 0:00:01[K     |██████▌                         | 61kB 2.3MB/s eta 0:00:01[K     |███████▋                        | 71kB 2.5MB/s eta 0:00:01[K     |████████▊                       | 81kB 2.7MB/s eta 0:00:01[K     |█████████▊                      | 92kB 2.9MB/s eta 0:00:01[K     |██████████▉                     | 102kB 2.8MB/s eta 0:00:01[K     |████████████                    | 112kB 2.8MB/s eta 0:00:01[K     |█████████████                   | 122kB 2.8MB/s eta 0:00:01[K  

In [None]:
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM

In [None]:
#fetching data and fromtating it.
# storing the dataset in variable "data"
data = fetch_movielens(min_rating = 4.0)
#only collecting movies with ratings 4 or higher

In [None]:
data
#fetch_movielens creates an interaction matrix from the method's csv file and then stores it in "data" as a dictionary

{'item_feature_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'item_features': <1682x1682 sparse matrix of type '<class 'numpy.float32'>'
 	with 1682 stored elements in Compressed Sparse Row format>,
 'item_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'test': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
 	with 5469 stored elements in COOrdinate format>,
 'train': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
 	with 49906 stored elements in COOrdinate format>}

In [None]:
len(data)

5

In [None]:
#print training and testing data
#fetch_movielens splits our dataset into training and testing data
print(repr(data['train']))
print(repr(data['test']))
#We have two such matrices, a training and a testing set. Both have around 1000 users and 1700 items. We’ll train the model on the train matrix but test it on the test matrix.

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


In [None]:
data['train']

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>

In [None]:
#creating our model
model = LightFM(loss = 'warp')
# loss is a parametetr in LightFM for a loss function for minimizing the loss
# warp = Weighted Approximate Rank Pairwise
# warp helps us create recommendations for each user and looking at existing user-rating pairs and predicting rankings for each
# uses gradient descent algorithm to iteratively find the weights that imporve our prediction over time
# it's a hybrid system => Collaborative(Similar Users' ratings) + Content(User's past rating history)

In [None]:
#train model
model.fit(data['train'], epochs = 30, num_threads = 2)
#number of epochs = number of runs for this training session

<lightfm.lightfm.LightFM at 0x7fd9fe7b2dd8>

In [None]:
#generating recommendation
#user ids parameter is a list of user ids, users that we wanna generate recommendations for
def sample_recommendation(model, data, user_ids):

    #number of users and movies in training data
    n_users, n_items = data['train'].shape

    #generate recommendations for each user we input
    for user_id in user_ids:

        #movies they already like
        #LightFM, to make the problem simpler, classifies 5 as positive and 4 or below as negative
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        #Compressed Sparse Row Format
        #[data['train'].tocsr()[user_id].indices] is a subarray indside of the data matrix which we'll retrieve using indices attribute

        #now we generate recommendations and store them in 'scores' variable using the predict method of our model
        # we'll use the user_id as the 1st parameter, and then a ist of each movie
        #using the 'arange' method of numpy, gives us every number from 0 upto the no. of items,
        # so we can predict the score for every movie

        #HENCE, movies our model predicts they will like
        scores = model.predict(user_id, np.arange(n_items))

        #rank them in order of their score
        # the 'argsort' method of numpy will return the score indices in descending order, bc of '-' sign
        top_items = data['item_labels'][np.argsort(-scores)]

        #print the results
        print("User %s" % user_id)
        print("     Known positives:")

        #printing the top 3 known +ve movies the user has picked
        for x in known_positives[:3]:
            print("        %s" % x)

        print("     Recommended:")


        #Printing the top3 recommended movies that our model predicts
        for x in top_items[:3]:
            print("        %s" % x)

In [None]:
sample_recommendation(model, data, [5, 69, 232])

User 5
     Known positives:
        Toy Story (1995)
        Babe (1995)
        Dead Man Walking (1995)
     Recommended:
        Casablanca (1942)
        Vertigo (1958)
        North by Northwest (1959)
User 69
     Known positives:
        Toy Story (1995)
        Babe (1995)
        Rumble in the Bronx (1995)
     Recommended:
        Raiders of the Lost Ark (1981)
        Star Wars (1977)
        Empire Strikes Back, The (1980)
User 232
     Known positives:
        Dead Man Walking (1995)
        Postino, Il (1994)
        Taxi Driver (1976)
     Recommended:
        Star Wars (1977)
        Raiders of the Lost Ark (1981)
        Silence of the Lambs, The (1991)
