# Testing LightFM

In [30]:
TEST_CODE = "1561003029.019894"
CHOSEN_EPOCH = 480

MODEL_PATH = "../log/{}/models/epoch_{}".format(TEST_CODE, CHOSEN_EPOCH)
LOG_PATH = "../log/{}/log.txt".format(TEST_CODE)

In [31]:
import pickle

from datetime import datetime

from tqdm import tqdm
from sklearn.model_selection import train_test_split

from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k

In [32]:
import pickle
import os

class Logger:

    def set_default_filename(self, filename):
        self.default_filename = filename

    def create_session_folder(self, path):
        try:  
            os.makedirs(path)
        except OSError:  
            print ("Creation of the directory %s failed" % path)
        else:  
            print ("     =====> Successfully created the directory %s \n" % path)

        try:
            os.makedirs(path + "models/")
        except OSError:  
            print ("Creation of the model directory failed")
        else:  
            print ("     =====> Successfully created the model directory")


    def log(self, text):
        with open(self.default_filename, 'a') as f:
            f.writelines(text)
            f.write("\n")

    def save_model(self, model, filename):
        pickle.dump(model, open(filename, 'wb'))

In [33]:
ratings_pivot_csr_filename = "../data/intersect-20m/ratings.csr"

ratings_pivot = pickle.load(open(ratings_pivot_csr_filename, 'rb'))
train, test = random_train_test_split(ratings_pivot, test_percentage=0.2)

train_csr = train.tocsr()
test_csr = test.tocsr()

### ========== Models ==========

In [34]:
model = pickle.load(open(MODEL_PATH, 'rb'))

### ========== Preparation ==========

In [35]:
from scipy.sparse import identity

user_identity = identity(train.shape[0])
item_identity = identity(train.shape[1])

In [36]:
test_user, test_item = test.nonzero()

### ========== Predict ==========

In [37]:
def get_top_suggestion(sample_user, k):
    
    test_item_idx = [i for i in range(0, test.shape[1])]    
    
    prediction = model.predict(user_ids=sample_user, item_ids=test_item_idx, user_features=user_identity, item_features=item_identity)
    prediction = [(prediction[i], i) for i in range(0, len(prediction))]
    prediction = sorted(prediction, reverse=True)
    
    return prediction[:k]


def get_top_truth(sample_user, k):
    
    truth = []

    user_ratings = test_csr[sample_user].todense().tolist()[0]
    user_rated_item = test_csr[sample_user].nonzero()[1]
    for item in user_rated_item:
        truth.append((user_ratings[item], item))

    user_ratings = train_csr[sample_user].todense().tolist()[0]
    user_rated_item = train_csr[sample_user].nonzero()[1]
    for item in user_rated_item:
        truth.append((user_ratings[item], item))

    truth = sorted(truth, reverse=True)
    
    return truth[:k]

In [38]:
def get_intersect_pred_truth(prediction, truth, k):
    pred_item_set = {x[1] for x in prediction[:k]}
    truth_item_set = {x[1] for x in truth[:k]}
    
    return pred_item_set.intersection(truth_item_set)

In [39]:
def check_precision_at_k(sample_user, k):
    prediction = get_top_suggestion(sample_user, k)
    truth = get_top_truth(sample_user, k)
    
    intersect = get_intersect_pred_truth(prediction, truth, k)
    return intersect, len(intersect)/k

In [40]:
from tqdm import tqdm

prec_with_train = []
for i in tqdm(range(0, 50000)):
    _, p = check_precision_at_k(i, 10)
    prec_with_train.append(p)

100%|██████████| 50000/50000 [27:53<00:00, 31.49it/s]


[0.0,
 0.2,
 0.0,
 0.2,
 0.1,
 0.1,
 0.2,
 0.0,
 0.1,
 0.2,
 0.0,
 0.4,
 0.3,
 0.0,
 0.1,
 0.0,
 0.0,
 0.0,
 0.1,
 0.0,
 0.2,
 0.2,
 0.1,
 0.0,
 0.0,
 0.5,
 0.0,
 0.2,
 0.4,
 0.0,
 0.0,
 0.2,
 0.0,
 0.3,
 0.1,
 0.0,
 0.0,
 0.1,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.0,
 0.0,
 0.2,
 0.2,
 0.2,
 0.1,
 0.0,
 0.1,
 0.4,
 0.0,
 0.1,
 0.0,
 0.1,
 0.0,
 0.1,
 0.2,
 0.0,
 0.0,
 0.0,
 0.1,
 0.2,
 0.1,
 0.2,
 0.0,
 0.1,
 0.1,
 0.1,
 0.1,
 0.1,
 0.2,
 0.0,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.1,
 0.1,
 0.1,
 0.2,
 0.0,
 0.0,
 0.1,
 0.1,
 0.0,
 0.2,
 0.1,
 0.0,
 0.2,
 0.1,
 0.0,
 0.0,
 0.2,
 0.1,
 0.1,
 0.2,
 0.2,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.0,
 0.1,
 0.0,
 0.2,
 0.1,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.0,
 0.1,
 0.2,
 0.2,
 0.0,
 0.0,
 0.0,
 0.1,
 0.0,
 0.1,
 0.1,
 0.2,
 0.0,
 0.3,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.0,
 0.0,
 0.0,
 0.1,
 0.1,
 0.3,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.1,
 0.0,
 0.0,
 0.1,
 0.1,
 0.1,
 0.0,
 0.2,
 0.2,
 0.2,
 0.3,
 0.2,
 0.0,
 0.1,
 0.2,
 0.2,
 0.0,
 0.0

In [41]:
import numpy as np

In [42]:
np.average(prec_with_train)

0.09678600000000002

## Check diversity

In [43]:
get_top_suggestion(188, k=10)

[(-0.2902214825153351, 10659),
 (-0.2934703528881073, 5058),
 (-0.4574028253555298, 7304),
 (-0.8620789647102356, 2073),
 (-0.9118945598602295, 13122),
 (-0.9250181317329407, 14025),
 (-0.9255124926567078, 6825),
 (-0.9265106916427612, 11536),
 (-0.9295519590377808, 1321),
 (-0.9298442602157593, 911)]

In [44]:
offset = 0 # discard top n suggestion
k = 10

sample_user = [np.random.randint(0, 138493) for i in range(0, k)]

intersect = {x[1] for x in get_top_suggestion(sample_user[0], k + offset)[offset:]}
uni = intersect
for i in range(1, 10):
    s = {x[1] for x in get_top_suggestion(sample_user[i], k + offset)[offset:]}
    print(sorted(s))
    intersect = intersect.intersection(s)
    uni = uni.union(s)
    
print("\nintersect")
print(intersect, len(intersect))
print("\nunion")
print(uni, len(uni))
print("\ndistinct rate")
print((len(uni)) / (10*k))

[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[911, 2073, 3663, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[911, 1321, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]
[676, 911, 2073, 5058, 6825, 7304, 10659, 11536, 13122, 14025]

intersect
{5058, 10659, 13122, 7304, 14025, 6825, 911, 11536, 2073} 9

union
{5058, 13122, 7304, 14025, 911, 11536, 3663, 2073, 10659, 676, 6825, 1321} 12

distinct rate
0.12


# ===================

In [45]:
# sample_user = [32, 1949, 1128, 4321, 7828, 8242, 2119, 1827, 6240, 12282]
sample_user = [np.random.randint(0, 138493) for i in range(0, 10)]
# offset = int(138493 * 0.9)
offset = 0
sample_user = [i + offset for i in sample_user]

In [46]:
for user in sample_user:
    prediction = get_top_suggestion(user, 10)
    prediction = [x[1] for x in prediction]
    
    truth = get_top_truth(user, 10)
    truth = [x[1] for x in truth]
    
    display((prediction))
    display((truth))
    display(check_precision_at_k(user, 10))
    display("==================")

[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 911, 676]

[13122, 11314, 8688, 6825, 6167, 4601, 616, 15187, 15182, 14319]

({6825, 13122}, 0.2)



[10659, 5058, 7304, 2073, 13122, 6825, 14025, 11536, 676, 1321]

[10796, 1168, 14162, 2506, 12342, 11661, 11239, 8688, 8672, 6835]

(set(), 0.0)



[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 676, 911]

[11677, 11536, 10659, 9371, 4260, 14164, 13122, 12645, 12342, 11762]

({10659, 11536, 13122}, 0.3)



[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 911, 676]

[11648, 11536, 9113, 5086, 4687, 2073, 14162, 13766, 11743, 11088]

({2073, 11536}, 0.2)



[3663, 10659, 5058, 7304, 2073, 13122, 6825, 14025, 11536, 911]

[10659, 9371, 2073, 14920, 10750, 10545, 10002, 9104, 5058, 3401]

({2073, 5058, 10659}, 0.3)



[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 676, 911]

[14544, 14048, 11966, 11903, 11871, 11519, 11006, 10659, 9823, 8962]

({10659}, 0.1)



[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 676, 911]

[14007, 13122, 11006, 11002, 10924, 10659, 9371, 7631, 7552, 7434]

({10659, 13122}, 0.2)



[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 911, 676]

[10659, 15410, 14705, 11006, 10396, 10371, 9326, 8962, 8959, 7541]

({10659}, 0.1)



[12378, 10659, 5058, 7304, 2073, 3594, 13122, 6825, 14025, 11536]

[15075, 14025, 9066, 8584, 6298, 4716, 18, 14231, 11363, 6825]

({6825, 14025}, 0.2)



[10659, 5058, 7304, 2073, 13122, 14025, 6825, 11536, 1321, 911]

[9066, 7304, 5058, 911, 15140, 14920, 14719, 12475, 12072, 10545]

({911, 5058, 7304}, 0.3)

