In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.sparse import load_npz, coo_matrix, csr_matrix, identity 
from lightfm import LightFM
import joblib
from custom_precision_at_k import custom_precision_at_k
from pathlib import Path

In [2]:
folder_path = Path("training_data")

In [3]:
y = load_npz(folder_path / 'y.npz')
item_features = load_npz(folder_path / 'item_features.npz')
user_features = load_npz(folder_path / 'user_features.npz')

In [4]:
r, c = (y).nonzero()
r_train, r_val, c_train, c_val = train_test_split(r, c, test_size=10_000, random_state=42)

y_train = coo_matrix((np.ones(len(r_train)), (r_train, c_train)), shape=y.shape, dtype=int)
y_val = coo_matrix((np.ones(len(r_val)), (r_val, c_val)), shape=y.shape, dtype=int)

In [5]:
model = joblib.load(folder_path / 'model_adagrad_200.pkl')
#model = LightFM(no_components=200, learning_rate=0.005, loss='warp', random_state=42, learning_schedule='adagrad')
for j in range(0):
    # model.item_bias_gradients = np.clip(model.item_bias_gradients, None, 100)
    # model.item_embedding_gradients = np.clip(model.item_embedding_gradients, None, 100)
    # model.user_bias_gradients = np.clip(model.user_bias_gradients, None, 100)
    # model.user_embedding_gradients = np.clip(model.user_embedding_gradients, None, 100)
    model.fit_partial(y_train, item_features=item_features, user_features=user_features, epochs=30, num_threads=10, verbose=True)
    joblib.dump(model, folder_path / 'model_adagrad_200.pkl')

    i = 1000
    np.random.seed(42)
    perm = np.random.permutation(y_train.shape[0])[:i]
    user_features_perm = csr_matrix(user_features)[perm]
    y_train_perm = csr_matrix(y_train)[perm]
    
    train_precision = custom_precision_at_k(model, y_train_perm, user_features=user_features_perm, item_features=item_features, num_threads=10)
    print(f'Train precision : {train_precision.mean() * 100}%')

In [6]:
i = 10000
np.random.seed(42)
perm = np.random.permutation(y_train.shape[0])[:i]
user_features_perm = csr_matrix(user_features)[perm]
y_train_perm = csr_matrix(y_train)[perm]

train_precision = custom_precision_at_k(model, y_train_perm, user_features=user_features_perm, item_features=item_features, num_threads=10)
print(f'Train precision : {train_precision.mean() * 100}%')

Train precision : 39.23657361914813%


In [7]:
val_precision = custom_precision_at_k(model, y_val, y_train, user_features=user_features, item_features=item_features, num_threads=10)
print(f'Val precision : {val_precision.mean() * 100}%')

Val precision : 11.173612192444699%
