In [0]:
pip install lightfm

Collecting lightfm
[?25l  Downloading https://files.pythonhosted.org/packages/e9/8e/5485ac5a8616abe1c673d1e033e2f232b4319ab95424b42499fabff2257f/lightfm-1.15.tar.gz (302kB)
[K     |█                               | 10kB 16.3MB/s eta 0:00:01[K     |██▏                             | 20kB 1.8MB/s eta 0:00:01[K     |███▎                            | 30kB 2.6MB/s eta 0:00:01[K     |████▍                           | 40kB 1.7MB/s eta 0:00:01[K     |█████▍                          | 51kB 2.1MB/s eta 0:00:01[K     |██████▌                         | 61kB 2.5MB/s eta 0:00:01[K     |███████▋                        | 71kB 2.9MB/s eta 0:00:01[K     |████████▊                       | 81kB 3.3MB/s eta 0:00:01[K     |█████████▊                      | 92kB 3.7MB/s eta 0:00:01[K     |██████████▉                     | 102kB 2.8MB/s eta 0:00:01[K     |████████████                    | 112kB 2.8MB/s eta 0:00:01[K     |█████████████                   | 122kB 2.8MB/s eta 0:00:01[K  

In [0]:
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM

In [0]:
#fetch data and format
data = fetch_movielens(min_rating=4.0)

#print training and testing data
print(repr(data['train']))
print(repr(data['test']))

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


LightFM is the recommender package implementing the WARP (Weighted Approximate-Rank Pairwise) loss for implicit feedback learning-to-rank.

In [0]:
#create model
model = LightFM(loss='warp')
#train model
model.fit(data['train'], epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x7f163a23f550>

In [0]:
def sample_recommendation(model, data, user_ids):

    #number of users and movies in training data
    n_users, n_items = data['train'].shape

    #generate recommendations for each user we input
    for user_id in user_ids:

        #movies they already like
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]

        #movies our model predicts they will like
        scores = model.predict(user_id, np.arange(n_items))
        #rank them in order of most liked to least
        top_items = data['item_labels'][np.argsort(-scores)]

        #print out the results
        print("User %s" % user_id)
        print("     Known positives:")

        for x in known_positives[:3]:
            print("        %s" % x)

        print("     Recommended:")

        for x in top_items[:3]:
            print("        %s" % x)
            
sample_recommendation(model, data, [3, 25, 450])

User 3
     Known positives:
        Seven (Se7en) (1995)
        Contact (1997)
        Starship Troopers (1997)
     Recommended:
        Scream (1996)
        Contact (1997)
        Air Force One (1997)
User 25
     Known positives:
        Dead Man Walking (1995)
        Star Wars (1977)
        Fargo (1996)
     Recommended:
        English Patient, The (1996)
        Fargo (1996)
        Contact (1997)
User 450
     Known positives:
        Contact (1997)
        George of the Jungle (1997)
        Event Horizon (1997)
     Recommended:
        Scream (1996)
        Air Force One (1997)
        I Know What You Did Last Summer (1997)
