In [None]:
!pip install lightfm
!pip install pandas
!pip install numpy
!pip install scipy
!pip install lightfm

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix, csr_matrix
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k
import time

In [None]:
##tune params##
no_components = 30
alpha = 0.01

start = time.time()
train= pd.read_parquet("/home/ps4379/final-project-group-29/train_small.parquet")
val= pd.read_parquet("/home/ps4379/final-project-group-29/validation_small.parquet")
print(train.head())

In [None]:
# build train coo matrix
train_dt = Dataset()
# Fit the Dataset object to the data
train_dt.fit(train['user_id'].unique(), train['recording_msid'].unique())

print("fit done")

# Build the interaction matrix
(train_interaction, weights) = train_dt.build_interactions(
    ((row['user_id'], row['recording_msid'], row['listens']) for index, row in train.iterrows())
)

In [None]:
model = LightFM(no_components=no_components, loss='warp',item_alpha=alpha, user_alpha=alpha)
model.fit(train_interaction, num_threads=1) 

train_precision = precision_at_k(model, train_interaction, k=100).mean()

print('rank: %d, alpha: %f, train MAP:  %f',(no_components, alpha, train_precision))

In [None]:
user_index_mapping = train_dt.mapping()[0]
item_index_mapping = train_dt.mapping()[2]
matrix_shape = train_dt.interactions_shape()

In [None]:
val = val[val['user_id'].isin(train['user_id']) & val['recording_msid'].isin(train['recording_msid'])]
## test = test[test['user_id'].isin(train['user_id']) & test['recording_msid'].isin(train['recording_msid'])]


In [None]:
# use the val data to build a matrix with the same shape of train
val_user = np.array([user_index_mapping[i] for i in val['user_id']])
val_item = np.array([item_index_mapping[i] for i in val['recording_msid']])
val_rating = val['listens'] 
val_interaction = coo_matrix((val_rating, (val_user, val_item)), shape=matrix_shape)
val_precision = precision_at_k(model, val_interaction, k=100).mean()



In [None]:
print('rank: %d, alpha: %f, val MAP:  %f',(no_components, alpha, val_precision))