# Import and class definitions

In [1]:
import recPipeline as pipe

# Testing ground

In [3]:
rec = pipe.Pipeline()
rec.preprocess()
model = rec.get_model(num_threads = 12)

In [14]:
rec.recommend_random(seed = 100, model = model)

[1mUser 10283 known items: [0m


Unnamed: 0,title,authors,rating
24,Silence,"Shūsaku Endō, William Johnston",5
94,The Indispensable Calvin and Hobbes,Bill Watterson,5
29,The Prophet,Kahlil Gibran,5
79,Goodnight Moon,"Margaret Wise Brown, Clement Hurd",5
76,The Wind in the Willows,"Kenneth Grahame, Gillian Avery",5
26,"Batman: The Dark Knight Returns (The Dark Knight Saga, #1)","Frank Miller, Klaus Janson, Lynn Varley",5
72,The Complete Grimm's Fairy Tales,"Jacob Grimm, Wilhelm Grimm, Josef Scharl, Padraic Colum, Joseph Campbell, Margaret Raine Hunt, James Stern",5
84,Harold and the Purple Crayon,Crockett Johnson,5
22,Confessions,"Augustine of Hippo, Henry Chadwick, Albert Cook Outler",5
21,The Amazing Adventures of Kavalier & Clay,Michael Chabon,5


[1mTop 10 suggested items:[0m


Unnamed: 0,title,authors
49,Where the Sidewalk Ends,Shel Silverstein
101,Where the Wild Things Are,Maurice Sendak
4,The Great Gatsby,F. Scott Fitzgerald
7,The Catcher in the Rye,J.D. Salinger
13,Animal Farm,George Orwell
12,1984,"George Orwell, Erich Fromm, Celâl Üster"
27,Lord of the Flies,William Golding
84,The Giving Tree,Shel Silverstein
31,Of Mice and Men,John Steinbeck
3,To Kill a Mockingbird,Harper Lee


## Some notes for moving forward.

1. now I need to do some gridSearch to find the best hyperparameters
    1. Since I need to preserve the entirety of a users reviews in either the test or train set and, when testing I need to keep some of the testers reviews to check the others, right? How do I do all this correctly?
2. find out what method/algo LightFM is using. Could another be better?
3. How do I read in a NEW user and give them recommendations? 
    1. Maybe start with retraining the entire model.
    2. Then move on to batch updating or something like that
4. finally deployment, docker, AWS, etc. 

In [2]:
import numpy as np
import pandas as pd
import random

random.seed(1001)

# just grab the data first
rec_check = pipe.Pipeline()
rec_check.preprocess()

# let's get the list of users for the test set
uids = rec_check.ratings.uid.unique().tolist()
test_uids = random.sample(uids, k = int(len(uids)*0.5))

# get all data for test users and put the rest in training
test_users = rec_check.ratings.query('uid == @test_uids')
train = rec_check.ratings.query('uid != @test_uids')

# only consider 40% of test users ratings and put the rest in the train set.
# This prevents the cold start problem on the test set. We will later incorporate 
# new users.
test = test_users.groupby('uid').sample(frac = 0.4, random_state = 1)
train = train.append(test_users.drop(test.index), ignore_index = True)

In [4]:
from scipy.sparse import coo_matrix as cm
import lightfm as lf

trainNumUsers = train.uid.max()+1
trainNumBooks = train.iid.max()+1

sparseTrain = cm((train.rating, (train.uid, train.iid)),
                       shape=(trainNumUsers, trainNumBooks))

testNumUsers = test.uid.max()+1
testNumBooks = test.iid.max()+1

sparseTest = cm((test.rating, (test.uid, test.iid)),
                       shape=(testNumUsers, testNumBooks))

In [13]:
model = lf.LightFM(loss='warp')
model.fit(sparseTrain, epochs=40, num_threads=12)

<lightfm.lightfm.LightFM at 0x7fd13caa0040>

In [14]:
from lightfm.evaluation import precision_at_k, recall_at_k

precision_at_k(model, sparseTest, sparseTrain, num_threads = 12).mean()

0.34711736

In [15]:
recall_at_k(model, sparseTest, sparseTrain, num_threads = 12).mean()

0.07992262640803939

In [None]:
# Those are really bad!