# Testing LightFM

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

## Load

In [2]:
import pickle

MOVIELENS_DATASET = "data/intersect-20m/"
ratings_pivot_csr_filename = MOVIELENS_DATASET + "/ratings.csr"
ratings_pivot = pickle.load(open(ratings_pivot_csr_filename, 'rb'))

## LightFM model

In [3]:
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split

train, test = random_train_test_split(ratings_pivot, test_percentage=0.2)

In [4]:
import time
from ipywidgets import IntProgress
from IPython.display import display

from lightfm.evaluation import precision_at_k

alpha = 1e-03
epochs = 30
num_components = 64

In [5]:
model_k3 = LightFM(no_components=num_components,
                    loss='warp',
                    k=3,
                    learning_schedule='adagrad',
                    user_alpha=alpha,
                    item_alpha=alpha)

model_k5 = LightFM(no_components=num_components,
                    loss='warp',
                    k=5,
                    learning_schedule='adagrad',
                    user_alpha=alpha,
                    item_alpha=alpha)

model_k10 = LightFM(no_components=num_components,
                    loss='warp',
                    k=10,
                    learning_schedule='adagrad',
                    user_alpha=alpha,
                    item_alpha=alpha)

In [None]:
duration_k3 = []
precisions_k3 = []

for epoch in tqdm(range(epochs)): 
    
    start = time.time()
    model_k3.fit_partial(train, epochs=1, num_threads=4)
    duration_k3.append(time.time() - start)
    
    precisions_k3.append(precision_at_k(model_k3, train, k=3).mean())

  0%|          | 0/30 [00:00<?, ?it/s]

In [None]:
duration_k5 = []
precisions_k5 = []

for epoch in tqdm(range(epochs)): 
    
    start = time.time()
    model_k5.fit_partial(train, epochs=1, num_threads=4)
    duration_k5.append(time.time() - start)
    
    precisions_k5.append(precision_at_k(model_k5, train, k=5).mean())

In [None]:
duration_k10 = []
precisions_k10 = []

for epoch in tqdm(range(epochs)): 
    
    start = time.time()
    model_k10.fit_partial(train, epochs=1, num_threads=4)
    duration_k10.append(time.time() - start)
    
    precisions_k10.append(precision_at_k(model_k10, train, k=10).mean())

In [None]:
x = np.arange(epochs)
plt.plot(x, np.array(precisions_k3))
plt.plot(x, np.array(precisions_k5))
plt.plot(x, np.array(precisions_k10))

plt.legend(['k3', 'k5', 'k10'], loc='lower right')
plt.show()

In [None]:
x = np.arange(epochs)
plt.plot(x, np.array(duration_k3))
plt.plot(x, np.array(duration_k5))
plt.plot(x, np.array(duration_k10))

plt.legend(['k3', 'k5', 'k10'], loc='lower right')
plt.show()

## Save models

In [None]:
pickle.dump(model_k3, open("Models/LightFM/model.k3.30epoch", 'wb'))
pickle.dump(model_k5, open("Models/LightFM/model.k3.30epoch", 'wb'))
pickle.dump(model_k10, open("Models/LightFM/model.k3.30epoch", 'wb'))