In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import coo_matrix
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score

with open('processed_review_data.pkl', 'rb') as f:
    df_reviews = pickle.load(f)
with open('processed_restaurant_data.pkl', 'rb') as f:
    df_rest = pickle.load(f)



In [3]:
df_reviews = df_reviews[[
    'user_id', 'restaurant_id', 'normalized_rating', 'bert_score',
    'recency_score', 'weighted_rating_score', 'user_name'
]]
df_rest = df_rest[[
    'id', 'categories_list', 'normalized_rating_restaurants',
    'normalized_wilson_score', 'normalized_latitude',
    'normalized_longitude', 'popularity_score',
    'normalized_log_review_count', 'name'
]]

In [5]:
user_ids = df_reviews['user_id'].unique()
rest_ids = df_reviews['restaurant_id'].unique()

user_to_index = {u: i for i, u in enumerate(user_ids)}
rest_to_index = {r: i for i, r in enumerate(rest_ids)}
index_to_user = {i: u for u, i in user_to_index.items()}
index_to_rest = {i: r for r, i in rest_to_index.items()}

df_reviews['user_idx'] = df_reviews['user_id'].map(user_to_index)
df_reviews['rest_idx'] = df_reviews['restaurant_id'].map(rest_to_index)

In [7]:
df_rest = df_rest[df_rest['id'].isin(rest_ids)]

feature_cols = [
    'categories_list', 'normalized_rating_restaurants',
    'normalized_wilson_score', 'normalized_latitude',
    'normalized_longitude', 'popularity_score',
    'normalized_log_review_count'
]
df_rest['item_feature_str'] = df_rest[feature_cols].astype(str).agg('|'.join, axis=1)


In [9]:
dataset = Dataset()
dataset.fit(user_ids, rest_ids)
unique_features = set()
for row in df_rest['item_feature_str']:
    unique_features.update(row.split('|'))
    
dataset.fit_partial(items=rest_ids, item_features=unique_features)

(interactions_matrix, _) = dataset.build_interactions([
    (row['user_id'], row['restaurant_id'], row['weighted_rating_score']) for _, row in df_reviews.iterrows()
])
item_features_matrix = dataset.build_item_features([
    (row['id'], row['item_feature_str'].split('|')) for _, row in df_rest.iterrows()
])

In [44]:
dataset = Dataset()
dataset.fit(user_ids, rest_ids)

unique_features = set()
for row in df_rest['item_feature_str']:
    unique_features.update(row.split('|'))
dataset.fit_partial(items=rest_ids, item_features=unique_features)

In [11]:
model = LightFM(loss='warp', no_components=32, learning_rate=0.03, learning_schedule='adadelta')
model.fit(interactions_matrix, item_features=item_features_matrix, epochs=120)

<lightfm.lightfm.LightFM at 0x12098ec80>

In [21]:
def precision_at_k_unseen(model, interactions_matrix, k=10, item_features=None):
    precisions = []
    for user_idx in range(interactions_matrix.shape[0]):
        user_id = index_to_user[user_idx]
        seen = user_history.get(user_id, set())
        seen_indices = {rest_to_index[r] for r in seen if r in rest_to_index}

        scores = model.predict(user_idx, np.arange(len(rest_ids)), item_features=item_features)
        sorted_items = np.argsort(-scores)

        filtered_items = [i for i in sorted_items if i not in seen_indices][:k]

        actual_interacted = set(np.nonzero(interactions_matrix.tocsr()[user_idx])[1])
        relevant_unseen = actual_interacted - seen_indices

        hits = sum([1 for item in filtered_items if item in relevant_unseen])
        precisions.append(hits / k)

    return np.mean(precisions)

In [23]:
print(f"Filtered Precision@10: {precision_at_k_unseen(model, interactions_matrix, k=10, item_features=item_features_matrix):.4f}")
print(f"Filtered Precision@15: {precision_at_k_unseen(model, interactions_matrix, k=15, item_features=item_features_matrix):.4f}")
print("Recall@5:", recall_at_k(model, interactions_matrix, k=5, item_features=item_features_matrix).mean())
print("AUC Score:", auc_score(model, interactions_matrix, item_features=item_features_matrix).mean())

Filtered Precision@10: 0.0000
Filtered Precision@15: 0.0000
Recall@5: 0.9444967808031103
AUC Score: 0.9997671


In [25]:
for k in [1, 3, 5, 10]:
    precision = precision_at_k(model, interactions_matrix, k=k, item_features=item_features_matrix).mean()
    print(f"Precision@{k}: {precision:.4f}")
print("Recall@5:", recall_at_k(model, interactions_matrix, k=5, item_features=item_features_matrix).mean())
print("AUC Score:", auc_score(model, interactions_matrix, item_features=item_features_matrix).mean())

Precision@1: 0.8719
Precision@3: 0.3900
Precision@5: 0.2551
Precision@10: 0.1375
Recall@5: 0.9444967808031103
AUC Score: 0.9997671


In [27]:
user_id_to_name = df_reviews.drop_duplicates('user_id')[['user_id', 'user_name']].set_index('user_id')['user_name'].to_dict()
rest_id_to_name = df_rest.set_index('id')['name'].to_dict()
user_history = df_reviews.groupby('user_id')['restaurant_id'].apply(set).to_dict()

def recommend_top_k(model, user_id, user_to_index, index_to_rest, item_features_matrix, k=5):
    if user_id not in user_to_index:
        return []
    user_idx = user_to_index[user_id]
    scores = model.predict(user_idx, np.arange(len(rest_ids)), item_features=item_features_matrix)
    seen = user_history.get(user_id, set())
    sorted_items = np.argsort(-scores)
    filtered = [index_to_rest[i] for i in sorted_items if index_to_rest[i] not in seen]
    return filtered[:k]

In [29]:
results = []
for user_id in user_ids:
    top_k = recommend_top_k(model, user_id, user_to_index, index_to_rest, item_features_matrix, k=5)
    for rank, rest_id in enumerate(top_k, start=1):
        results.append({
            "user_id": user_id,
            "user_name": user_id_to_name.get(user_id, "Unknown"),
            "rank": rank,
            "recommended_restaurant_id": rest_id,
            "restaurant_name": rest_id_to_name.get(rest_id, "Unknown")
        })

recommendation_df = pd.DataFrame(results)
final_output = recommendation_df[['user_name', 'rank', 'restaurant_name']]

In [30]:
final_output

Unnamed: 0,user_name,rank,restaurant_name
0,Tatyana M.,1,Jimmy John's
1,Tatyana M.,2,Shark's Fish
2,Tatyana M.,3,The Walnut Room
3,Tatyana M.,4,Mahari
4,Tatyana M.,5,Jordyn's Soul Cafe
...,...,...,...
47460,Donny F.,1,Primos Chicago Pizza
47461,Donny F.,2,Giordano's
47462,Donny F.,3,Nesh Mediterranean Grill
47463,Donny F.,4,Thai Spoon & Sushi
