# Testing LightFM

In [1]:
TEST_CODE = "1560762749.10787"
CHOSEN_EPOCH = 940

MODEL_PATH = "../log/{}/models/epoch_{}".format(TEST_CODE, CHOSEN_EPOCH)
LOG_PATH = "../log/{}/log.txt".format(TEST_CODE)

In [2]:
import pickle

from datetime import datetime

from tqdm import tqdm
from sklearn.model_selection import train_test_split

from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k

In [3]:
import pickle
import os

class Logger:

    def set_default_filename(self, filename):
        self.default_filename = filename

    def create_session_folder(self, path):
        try:  
            os.makedirs(path)
        except OSError:  
            print ("Creation of the directory %s failed" % path)
        else:  
            print ("     =====> Successfully created the directory %s \n" % path)

        try:
            os.makedirs(path + "models/")
        except OSError:  
            print ("Creation of the model directory failed")
        else:  
            print ("     =====> Successfully created the model directory")


    def log(self, text):
        with open(self.default_filename, 'a') as f:
            f.writelines(text)
            f.write("\n")

    def save_model(self, model, filename):
        pickle.dump(model, open(filename, 'wb'))

In [4]:
ratings_pivot_csr_filename = "../data/intersect-20m/ratings.csr"

ratings_pivot = pickle.load(open(ratings_pivot_csr_filename, 'rb'))
train, test = random_train_test_split(ratings_pivot, test_percentage=0.2)

train_csr = train.tocsr()
test_csr = test.tocsr()

### ========== Models ==========

In [5]:
model = pickle.load(open(MODEL_PATH, 'rb'))

### ========== Preparation ==========

In [6]:
from scipy.sparse import identity

user_identity = identity(train.shape[0])
item_identity = identity(train.shape[1])

In [7]:
test_user, test_item = test.nonzero()

### ========== Predict ==========

In [8]:
def get_top_suggestion(sample_user, k):
    
    test_item_idx = [i for i in range(0, test.shape[1])]    
    
    prediction = model.predict(user_ids=sample_user, item_ids=test_item_idx, user_features=user_identity, item_features=item_identity)
    prediction = [(prediction[i], i) for i in range(0, len(prediction))]
    prediction = sorted(prediction, reverse=True)
    
    return prediction[:k]


def get_top_truth(sample_user, k):
    
    truth = []

    user_ratings = test_csr[sample_user].todense().tolist()[0]
    user_rated_item = test_csr[sample_user].nonzero()[1]
    for item in user_rated_item:
        truth.append((user_ratings[item], item))

    user_ratings = train_csr[sample_user].todense().tolist()[0]
    user_rated_item = train_csr[sample_user].nonzero()[1]
    for item in user_rated_item:
        truth.append((user_ratings[item], item))

    truth = sorted(truth, reverse=True)
    
    return truth[:k]

In [9]:
prediction = get_top_suggestion(2121, 10)
prediction

[(-0.11680272221565247, 4934),
 (-0.11938527971506119, 10452),
 (-0.5634471774101257, 7147),
 (-0.8154057264328003, 12876),
 (-0.8165539503097534, 2012),
 (-0.8179770708084106, 13695),
 (-0.8185664415359497, 11316),
 (-0.8195421695709229, 6678),
 (-0.8198939561843872, 878),
 (-0.8199808597564697, 654)]

In [10]:
truth = get_top_truth(2, 10)
truth

[(1.0, 14970),
 (1.0, 14727),
 (1.0, 14226),
 (1.0, 14030),
 (1.0, 13709),
 (1.0, 13512),
 (1.0, 12651),
 (1.0, 12255),
 (1.0, 12241),
 (1.0, 12224)]

In [11]:
def get_intersect_pred_truth(prediction, truth, k):
    pred_item_set = {x[1] for x in prediction[:k]}
    truth_item_set = {x[1] for x in truth[:k]}
    
    return pred_item_set.intersection(truth_item_set)

In [12]:
def check_precision_at_k(sample_user, k):
    prediction = get_top_suggestion(sample_user, k)
    truth = get_top_truth(sample_user, k)
    
    intersect = get_intersect_pred_truth(prediction, truth, k)
    return intersect, len(intersect)/k

In [13]:
from tqdm import tqdm

prec_with_train = []
for i in tqdm(range(0, 50000)):
    _, p = check_precision_at_k(i, 10)
    prec_with_train.append(p)
    
prec_with_train

100%|██████████| 50000/50000 [25:56<00:00, 32.12it/s]


[0.0,
 0.2,
 0.0,
 0.2,
 0.1,
 0.1,
 0.2,
 0.3,
 0.1,
 0.2,
 0.0,
 0.4,
 0.3,
 0.1,
 0.3,
 0.0,
 0.1,
 0.0,
 0.1,
 0.0,
 0.2,
 0.2,
 0.1,
 0.0,
 0.0,
 0.5,
 0.0,
 0.2,
 0.4,
 0.0,
 0.0,
 0.2,
 0.0,
 0.3,
 0.1,
 0.0,
 0.0,
 0.1,
 0.1,
 0.0,
 0.0,
 0.2,
 0.1,
 0.0,
 0.0,
 0.2,
 0.2,
 0.2,
 0.1,
 0.1,
 0.1,
 0.4,
 0.0,
 0.1,
 0.0,
 0.1,
 0.1,
 0.1,
 0.2,
 0.0,
 0.0,
 0.0,
 0.1,
 0.3,
 0.1,
 0.3,
 0.0,
 0.1,
 0.1,
 0.1,
 0.1,
 0.1,
 0.2,
 0.0,
 0.3,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.1,
 0.1,
 0.1,
 0.2,
 0.0,
 0.0,
 0.1,
 0.1,
 0.0,
 0.2,
 0.1,
 0.0,
 0.2,
 0.1,
 0.0,
 0.0,
 0.2,
 0.1,
 0.1,
 0.2,
 0.2,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.0,
 0.1,
 0.0,
 0.2,
 0.1,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.0,
 0.1,
 0.2,
 0.2,
 0.0,
 0.0,
 0.0,
 0.1,
 0.0,
 0.1,
 0.1,
 0.2,
 0.0,
 0.3,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.0,
 0.0,
 0.0,
 0.1,
 0.2,
 0.3,
 0.2,
 0.0,
 0.0,
 0.1,
 0.0,
 0.0,
 0.2,
 0.1,
 0.1,
 0.0,
 0.1,
 0.1,
 0.1,
 0.0,
 0.2,
 0.2,
 0.2,
 0.3,
 0.1,
 0.0,
 0.1,
 0.2,
 0.2,
 0.0,
 0.0

In [14]:
import numpy as np

In [15]:
np.average(prec_with_train)

0.10515

# ===================

In [16]:
# sample_user = [32, 1949, 1128, 4321, 7828, 8242, 2119, 1827, 6240, 12282]
sample_user = [np.random.randint(1, 15000) for i in range(0, 10)]
offset = int(138493 * 0.9)
sample_user = [i + offset for i in sample_user]

In [17]:
for user in sample_user:
    prediction = get_top_suggestion(user, 10)
    prediction = [x[1] for x in prediction]
    
    truth = get_top_truth(user, 10)
    truth = [x[1] for x in truth]
    
    display((prediction))
    display((truth))
    display(check_precision_at_k(user, 10))
    display("==================")

[4934, 10452, 7147, 12876, 2012, 13695, 11316, 6678, 654, 878]

[13718, 5884, 1619, 1340, 13900, 13382, 10356, 10107, 8686, 8372]

(set(), 0.0)



[4934, 10452, 7147, 12876, 2012, 13695, 11316, 6678, 878, 654]

[13410, 12134, 11619, 11466, 10615, 9398, 8606, 5768, 5562, 15033]

(set(), 0.0)



[4934, 10452, 7147, 12876, 2012, 13695, 11316, 6678, 878, 654]

[1018, 637, 5510, 2663, 363, 14066, 596, 14105, 6672, 232]

(set(), 0.0)



[4934, 10452, 7147, 12876, 2012, 13695, 11316, 6678, 878, 654]

[11604, 11419, 11316, 11274, 8564, 7652, 7470, 6829, 6176, 5608]

({11316}, 0.1)



[4934, 10452, 7147, 12876, 2012, 13695, 11316, 6678, 878, 654]

[14970, 14486, 14318, 14314, 14179, 13710, 13696, 13695, 13605, 13566]

({13695}, 0.1)



Exception: Number of user feature rows does not equal the number of users