In [1]:
import lightfm
import scipy as sp
import pymongo
import pandas as pd
import numpy as np



In [3]:
# Read in yelp.parquet
df = pd.read_parquet('yelp.parquet')
# Sample df down to 100,000 rows
df = df.sample(100_000)
# Train test split
from sklearn.model_selection import train_test_split
# Replace user_id and business_id with unique integers
user_id = df.user_id.unique()
business_id = df.business_id.unique()
user_id_map = {user_id[i]: i for i in range(len(user_id))}
business_id_map = {business_id[i]: i for i in range(len(business_id))}

Train shape: (84089, 46456)
Test shape: (84079, 46457)


In [14]:
# Split train and test
train, test = train_test_split(df, test_size=0.2)

In [32]:
# Build dataset using lightfm
from lightfm.data import Dataset
dataset = Dataset()
dataset.fit(
    (row['user_id'] for _, row in df.iterrows()),
    (row['business_id'] for _, row in df.iterrows()))
# # Build interactions
# (interactions, weights) = dataset.build_interactions(
#     ((row['user_id'], row['business_id'], row['stars']) for _, row in train.iterrows()))


In [33]:
# Build test interactions
test_interactions, test_weights = dataset.build_interactions(
    ((row['user_id'], row['business_id'], row['stars']) for _, row in test.iterrows()))
# Build train interactions
train_interactions, train_weights = dataset.build_interactions(
    ((row['user_id'], row['business_id'], row['stars']) for _, row in train.iterrows()))

In [34]:
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

# Instantiate and train the model
model = lightfm.LightFM()
model.fit(train_interactions, epochs=30, num_threads=4)

<lightfm.lightfm.LightFM at 0x202c9132880>

In [36]:
# Mean Average Precision @ (cutoff) K
train_precision = precision_at_k(model, train_interactions, k=5).mean()
print(f'Train precision: {train_precision:2}'  )

Train precision: 0.0012363055720925331


In [35]:
test_precision = precision_at_k(model, test_interactions, k=5).mean()
print(f'Test Precision: {test_precision:.2f}')
train_auc = auc_score(model, test_interactions).mean()
print(f'Train AUC: {train_auc:.2f}')


Train Precision: 0.00
Train AUC: 0.55
