### [Light FM Docs](http://lyst.github.io/lightfm/docs/home.html)

In [63]:
import numpy as np
import pandas as pd
from lightfm.datasets import fetch_movielens
from lightfm import LightFM
## WARNING thrown is not actually an issue, just an OSX OpenMP support due to gcc libraries.
## Should be irrelevant at scale on linux

# Get Data and Look at it

In [64]:
# Get the Data
data = fetch_movielens(min_rating=4.0)
# Create the dataframe
dataDF = pd.DataFrame.from_dict(data, orient='columns')
dataDF.head()

Unnamed: 0,item_feature_labels,item_features,item_labels,test,train
0,Toy Story (1995),"(0, 0)\t1.0\n (1, 1)\t1.0\n (2, 2)\t1.0\n ...",Toy Story (1995),"(0, 19)\t4\n (0, 32)\t4\n (0, 60)\t4\n (0...","(0, 0)\t5\n (0, 2)\t4\n (0, 5)\t5\n (0, 6..."
1,GoldenEye (1995),"(0, 0)\t1.0\n (1, 1)\t1.0\n (2, 2)\t1.0\n ...",GoldenEye (1995),"(0, 19)\t4\n (0, 32)\t4\n (0, 60)\t4\n (0...","(0, 0)\t5\n (0, 2)\t4\n (0, 5)\t5\n (0, 6..."
2,Four Rooms (1995),"(0, 0)\t1.0\n (1, 1)\t1.0\n (2, 2)\t1.0\n ...",Four Rooms (1995),"(0, 19)\t4\n (0, 32)\t4\n (0, 60)\t4\n (0...","(0, 0)\t5\n (0, 2)\t4\n (0, 5)\t5\n (0, 6..."
3,Get Shorty (1995),"(0, 0)\t1.0\n (1, 1)\t1.0\n (2, 2)\t1.0\n ...",Get Shorty (1995),"(0, 19)\t4\n (0, 32)\t4\n (0, 60)\t4\n (0...","(0, 0)\t5\n (0, 2)\t4\n (0, 5)\t5\n (0, 6..."
4,Copycat (1995),"(0, 0)\t1.0\n (1, 1)\t1.0\n (2, 2)\t1.0\n ...",Copycat (1995),"(0, 19)\t4\n (0, 32)\t4\n (0, 60)\t4\n (0...","(0, 0)\t5\n (0, 2)\t4\n (0, 5)\t5\n (0, 6..."


In [65]:
# Use Numpy to look at the shape of the test and train data sets
print(type(data))
print(repr(data['train']))
print(repr(data['test']))

<class 'dict'>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


In [66]:
# The nested data type is scipy matrix
print(type(data['train']))
#print(data['train'])

<class 'scipy.sparse.coo.coo_matrix'>


In [67]:
# Use Pandas to get familiar with the test and train data sets
dataDF.loc[:, ['test', 'train']].shape

(1682, 2)

Create the model

Here, we'll use a "WARP" (Weighted Approximate-Rank Pairwise) method.
Basically a weighted gradient descent model that will use regression
to minimize the error.

In [68]:
# Create Model
model = LightFM(loss='warp')
# Fit Mode
model.fit(data['train'], epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x10a38f390>

In [69]:
def sample_rec(model, data, user_ids):
    
    #number of users and movies in training data
    n_users, n_items = data['train'].shape
    
    #generate recommendations for each user we input
    for user_id in user_ids:
        
        #movies they already like
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        
        #movies our model predicts they will like
        scores = model.predict(user_id, np.arange(n_items))
        
        #rank them n order of most liked to least
        top_items = data['item_labels'][np.argsort(-scores)]
        
        #print the results
        print("User %s" % user_id)
        print("      Known positives: ")
        
        for x in known_positives[:3]:
            print("        %s" % x)
            
        print("       Recommended: ")
        
        for x in top_items[:3]:
            print("         %s" % x)

In [70]:
sample_rec(model, data, [3, 25, 450])

User 3
      Known positives: 
        Seven (Se7en) (1995)
        Contact (1997)
        Starship Troopers (1997)
       Recommended: 
         Scream (1996)
         Contact (1997)
         Air Force One (1997)
User 25
      Known positives: 
        Dead Man Walking (1995)
        Star Wars (1977)
        Fargo (1996)
       Recommended: 
         English Patient, The (1996)
         Titanic (1997)
         L.A. Confidential (1997)
User 450
      Known positives: 
        Contact (1997)
        George of the Jungle (1997)
        Event Horizon (1997)
       Recommended: 
         Scream (1996)
         Dante's Peak (1997)
         Conspiracy Theory (1997)
