In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import coo_matrix
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score


with open('processed_review_data.pkl', 'rb') as f:
    df_reviews = pickle.load(f)



In [2]:
user_ids = df_reviews['user_id'].unique()
rest_ids = df_reviews['restaurant_id'].unique()
user_to_index = {u: i for i, u in enumerate(user_ids)}
rest_to_index = {r: i for i, r in enumerate(rest_ids)}
index_to_user = {i: u for u, i in user_to_index.items()}
index_to_rest = {i: r for r, i in rest_to_index.items()}

In [3]:
df_reviews['user_idx'] = df_reviews['user_id'].map(user_to_index)
df_reviews['rest_idx'] = df_reviews['restaurant_id'].map(rest_to_index)

In [4]:
interactions = coo_matrix((
    df_reviews['weighted_rating_score'].values,
    (df_reviews['user_idx'], df_reviews['rest_idx'])
))

In [5]:
model = LightFM(
    loss='warp',
    no_components=32,           
    learning_rate=0.03,      
    learning_schedule='adadelta'
)

In [6]:
model.fit(interactions, epochs=120)

<lightfm.lightfm.LightFM at 0x1210a1a50>

In [7]:
user_history = df_reviews.groupby('user_id')['restaurant_id'].apply(set).to_dict()

def precision_at_k_unseen(model, interactions_matrix, k=10, item_features=None):
    precisions = []
    for user_idx in range(interactions_matrix.shape[0]):
        user_id = index_to_user[user_idx]
        seen = user_history.get(user_id, set())
        seen_indices = {rest_to_index[r] for r in seen if r in rest_to_index}

        scores = model.predict(user_idx, np.arange(len(rest_ids)), item_features=item_features)
        sorted_items = np.argsort(-scores)

        filtered_items = [i for i in sorted_items if i not in seen_indices][:k]

        actual_interacted = set(np.nonzero(interactions_matrix.tocsr()[user_idx])[1])
        relevant_unseen = actual_interacted - seen_indices

        hits = sum([1 for item in filtered_items if item in relevant_unseen])
        precisions.append(hits / k)

    return np.mean(precisions)

In [8]:
print("Filtered Precision at 10 (unseen):", precision_at_k_unseen(model, interactions, k=10))
print("Filtered Precision at 15 (unseen):", precision_at_k_unseen(model, interactions, k=15))

Filtered Precision at 10 (unseen): 0.0
Filtered Precision at 15 (unseen): 0.0


In [27]:
print("Precision at 5 (seen):", precision_at_k(model, interactions, k=5).mean())
print("Recall at 5 (seen):", recall_at_k(model, interactions, k=5).mean())
print("AUC Score (seen):", auc_score(model, interactions).mean())

Precision at 5 (seen): 0.2655009
Recall at 5 (seen): 0.9899183799327046
AUC Score (seen): 0.9999999


In [15]:
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
for k in [1, 3, 5, 10]:
    print(f"Precision at {k}: {precision_at_k(model, interactions, k=k).mean():.4f}")

Precision at 1: 0.9999
Precision at 3: 0.4153
Precision at 5: 0.2655
Precision at 10: 0.1404


In [29]:
def recommend_top_k(model, user_id, user_to_index, index_to_rest, k=5):
    if user_id not in user_to_index:
        return []
    user_idx = user_to_index[user_id]
    scores = model.predict(user_idx, np.arange(len(rest_ids)))
    top_items = np.argsort(-scores)[:k]
    return [index_to_rest[i] for i in top_items]

In [31]:
results = []
for user_id in user_ids:
    top_k = recommend_top_k(model, user_id, user_to_index, index_to_rest, k=5)
    for rank, rest_id in enumerate(top_k, start=1):
        results.append({
            "user_id": user_id,
            "rank": rank,
            "recommended_restaurant_id": rest_id
        })

recommendation_df = pd.DataFrame(results)
user_id_to_name = df_reviews[['user_id', 'user_name']].drop_duplicates().set_index('user_id')['user_name'].to_dict()

In [33]:
with open('processed_restaurant_data.pkl', 'rb') as f:
    df_rest = pickle.load(f)

rest_id_to_name = df_rest[['id', 'name']].drop_duplicates().set_index('id')['name'].to_dict()

recommendation_df['user_name'] = recommendation_df['user_id'].map(user_id_to_name)
recommendation_df['restaurant_name'] = recommendation_df['recommended_restaurant_id'].map(rest_id_to_name)

final_output = recommendation_df[['user_name', 'rank', 'restaurant_name']]
final_output.head(15)

Unnamed: 0,user_name,rank,restaurant_name
0,Tatyana M.,1,Jamaica Jerk Villa
1,Tatyana M.,2,Taqueria Varitas
2,Tatyana M.,3,Mahari
3,Tatyana M.,4,Southport Grocery & Cafe
4,Tatyana M.,5,Juanitas Mexican food #2
5,Magic S.,1,Jamaica Jerk Villa
6,Magic S.,2,Taqueria Varitas
7,Magic S.,3,Asian Kitchen Express
8,Magic S.,4,Mahari
9,Magic S.,5,Huaraches Mexican Restaurant
