In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from lightfm import LightFM
from lightfm.evaluation import auc_score, precision_at_k

data = pd.read_csv('preprocessed.csv')
sample_size = 100000
if len(data) > sample_size:
    data = data.sample(n=sample_size, random_state=42)

unique_users = data['customer_id'].unique()
unique_items = data['product_id'].unique()
user_map = {id: idx for idx, id in enumerate(unique_users)}
item_map = {id: idx for idx, id in enumerate(unique_items)}
data['user_idx'] = data['customer_id'].apply(lambda x: user_map[x])
data['item_idx'] = data['product_id'].apply(lambda x: item_map[x])

num_users = len(unique_users)
num_items = len(unique_items)
row = data['user_idx']
col = data['item_idx']
rating = data['star_rating']
input_matrix = csr_matrix((rating, (row, col)), shape=(num_users, num_items))

  "LightFM was compiled without OpenMP support. "


In [2]:
input_matrix

<94299x39597 sparse matrix of type '<class 'numpy.int64'>'
	with 99986 stored elements in Compressed Sparse Row format>

In [3]:
model = LightFM(loss='warp', learning_rate=0.05, no_components=32, random_state=42)

In [4]:
num_epochs = 10
num_users, num_items = input_matrix.shape

for epoch in range(num_epochs):
    model.fit(input_matrix, epochs=1, verbose=True)
    train_auc = auc_score(model, input_matrix).mean()
    train_precision = precision_at_k(model, input_matrix, k=5).mean()

    print("Epoch {}/{}: train AUC = {:.4f}, precision at k = {:.4f}".format(epoch+1, num_epochs, train_auc, train_precision))

Epoch 0
Epoch 1/10: train AUC = 0.7243, precision at k = 0.0037
Epoch 0
Epoch 2/10: train AUC = 0.7238, precision at k = 0.0039
Epoch 0
Epoch 3/10: train AUC = 0.7239, precision at k = 0.0036
Epoch 0
Epoch 4/10: train AUC = 0.7232, precision at k = 0.0036
Epoch 0
Epoch 5/10: train AUC = 0.7245, precision at k = 0.0040
Epoch 0
Epoch 6/10: train AUC = 0.7224, precision at k = 0.0038
Epoch 0
Epoch 7/10: train AUC = 0.7225, precision at k = 0.0035
Epoch 0
Epoch 8/10: train AUC = 0.7241, precision at k = 0.0037
Epoch 0
Epoch 9/10: train AUC = 0.7264, precision at k = 0.0036
Epoch 0
Epoch 10/10: train AUC = 0.7238, precision at k = 0.0034


In [8]:
user_id = 15
scores = model.predict(user_ids=user_id, item_ids=np.arange(num_items))

top_items = np.argsort(-scores)[:5]
print("Top 5 recommendations for user {}: {}".format(user_id, top_items))

# print("Train precision: {:.2f}".format(train_precision))

Top 5 recommendations for user 15: [ 155  139  234 1072   96]
