In [1]:
from yelp_recommendation_system import *
import pandas as pd
# The main culprit was that numpy was not imported
# it's an issue with the dill package when loading the model,
import numpy as np
from yelp_recommendation_system.utils import get_user_recommendations, recommend_for_new_user, build_feat_vector_for_new_user

In [2]:
model_path = '../final_model/hybrid_recommender_v6.pkl'

In [3]:
try:
    # load the model
    recsys = YelpRecommenderSystem()
    recsys.load_model(model_path)

    print("\n Model bundle loaded successfully!")

    # verify components
    print("\nVerifying components:")
    components = {
        'model': recsys.model,
        'business_features': recsys.business_features,
        'user_features': recsys.user_features,
        'filtered_reviews': recsys.filtered_reviews,
        'filtered_business_df': recsys.filtered_business_df,
        'trainset': recsys.trainset,
        'testset': recsys.testset}

except Exception as e:
        print(f"\n Error loading bundle: {e}")
        import traceback
        traceback.print_exc()
        bundle_loaded = False

Model loaded from ../final_model/hybrid_recommender_v6.pkl
Loaded model bundle with all supporting data

 Model bundle loaded successfully!

Verifying components:


In [5]:
# Check trainset attributes and methods
print("Trainset attributes:")
print(f"  n_users: {recsys.model.trainset.n_users}")
print(f"  n_items: {recsys.model.trainset.n_items}")
print(f"  n_ratings: {recsys.model.trainset.n_ratings}")
print(f"  rating_scale: {recsys.model.trainset.rating_scale}")

# get all user ids
all_raw_user_ids = [recsys.model.trainset.to_raw_uid(i) for i in range(recsys.model.trainset.n_users)]

user_id = all_raw_user_ids[4]
print(f"\nGetting recommendations for user: {user_id}")

# use the full business_features from the bundle
recs = get_user_recommendations(
    recsys.model,
    user_id=user_id,
    business_features=recsys.business_features,
    reviews=recsys.filtered_reviews,
    business_df=recsys.business_features,  # Don't use the filtered business_df
    n=10,
    min_rating=3.7)

Trainset attributes:
  n_users: 18124
  n_items: 21963
  n_ratings: 826307
  rating_scale: (1, 5)

Getting recommendations for user: ZTHZsP7NSnc3Csje-SKNyQ


In [7]:
recs

Unnamed: 0,user_id,business_id,predicted_rating,name,city,state,stars,categories
0,ZTHZsP7NSnc3Csje-SKNyQ,IO3apWfFnKPO2XRFmGfTEw,5.007942,Nathaniel Reid Bakery,Saint Louis,MO,4.5,"Sandwiches, Food, Desserts, Restaurants, Bakeries"
1,ZTHZsP7NSnc3Csje-SKNyQ,42dVj5q-LMx_iJxcq5Fzng,4.984298,Vida,Indianapolis,IN,4.5,"Restaurants, Local Flavor, American (New), Sal..."
2,ZTHZsP7NSnc3Csje-SKNyQ,tARR9jhv5gi9TjsfSVmjmw,4.943712,Kaffe Crepe,Reno,NV,5.0,"Food, Restaurants, Cafes, Creperies, Coffee & Tea"
3,ZTHZsP7NSnc3Csje-SKNyQ,dclL14pr7OojCOKJ_ZbMbQ,4.943018,Angelo Brocato,New Orleans,LA,4.5,"Bakeries, Ice Cream & Frozen Yogurt, Food, Des..."
4,ZTHZsP7NSnc3Csje-SKNyQ,LRr-aw58xVMkUu0OSL_BpQ,4.923383,The Little One,Tucson,AZ,4.5,"Restaurants, Mexican, Breakfast & Brunch, Lati..."
5,ZTHZsP7NSnc3Csje-SKNyQ,2KIDQyTh-HzLxOUEDqtDBg,4.916015,Mazzaro's Italian Market,Saint Petersburg,FL,4.5,"Specialty Food, Delis, Coffee Roasteries, Butc..."
6,ZTHZsP7NSnc3Csje-SKNyQ,xoVTtQd6xRrFbQcdfH0dLA,4.914324,Jeremiah's Italian Ice,Tampa,FL,4.5,"Shaved Ice, Ice Cream & Frozen Yogurt, Dessert..."
7,ZTHZsP7NSnc3Csje-SKNyQ,zp9OcdUq2CWtQuI9FFBOQQ,4.9076,Iovine Brothers Produce,Philadelphia,PA,4.5,"Fruits & Veggies, Food, Specialty Food"
8,ZTHZsP7NSnc3Csje-SKNyQ,JeSIzQ1MbaYGMrGDtEAutQ,4.892426,Nicey Treat,Indianapolis,IN,5.0,"Food, Ice Cream & Frozen Yogurt"
9,ZTHZsP7NSnc3Csje-SKNyQ,_aKr7POnacW_VizRKBpCiA,4.890298,Blues City Deli,Saint Louis,MO,5.0,"Delis, Bars, Restaurants, Nightlife, Pubs, Ame..."


#### Used to create a subset of user recommendations for testing and visualization

In [6]:
from time import time
start = time()
pred_dfs = []
for i, uid in enumerate(all_raw_user_ids[:10]):

    try:
        recs = get_user_recommendations(
            recsys.model,
            user_id=uid,
            business_features=recsys.business_features,
            reviews=recsys.filtered_reviews,
            business_df=recsys.business_features,
            n=100,
            min_rating=3.7)
        pred_dfs.append(recs)
    except Exception as e:
        print(f"Error getting recommendations for user {uid}: {e}")
end = time()
print(f"Generated recommendations for {len(pred_dfs)} users in {end - start:.2f} seconds.")
recs_subset = pd.concat(pred_dfs)
# recs_subset.to_csv("user_recommendation_subset.csv", index=False)

Generated recommendations for 10 users in 14.57 seconds.


In [14]:
rr = get_user_recommendations(recsys.model, 'rkmsy8QUiG3FmkLPHW5ejw', recsys.business_features, recsys.filtered_reviews, recsys.business_features, n=5, min_rating=3.5)
rr

Unnamed: 0,user_id,business_id,predicted_rating,name,city,state,stars,categories
0,rkmsy8QUiG3FmkLPHW5ejw,atZ_olNKXOG4rEr6mccN8g,4.9939,Beiler's Bakery,Philadelphia,PA,4.5,"Food, Donuts, Food Stands, Restaurants, Specia..."
1,rkmsy8QUiG3FmkLPHW5ejw,zp9OcdUq2CWtQuI9FFBOQQ,4.987351,Iovine Brothers Produce,Philadelphia,PA,4.5,"Fruits & Veggies, Food, Specialty Food"
2,rkmsy8QUiG3FmkLPHW5ejw,CYKj959PZDjweV-CSpssCg,4.937804,The Steak House at Western Village,Sparks,NV,4.5,"Steakhouses, Food, Salad, Desserts, Restaurants"
3,rkmsy8QUiG3FmkLPHW5ejw,RqW9S4WG9UYZHKhHRHXJZg,4.909391,Lolis Mexican Cravings,Tampa,FL,4.5,"Ethnic Food, Restaurants, Specialty Food, Mexi..."
4,rkmsy8QUiG3FmkLPHW5ejw,tc6jKQiRjFO2M7BylwHVrg,4.902905,Kwok's Bistro,Reno,NV,4.5,"Noodles, Restaurants, Chinese, Asian Fusion"


In [11]:
# example: some new user profile (we would add a new users data if it were an app or service)
new_user = {
    # id is optional for this path; model doesn't use it
    # 'user_id': 'new_user_123',

    'review_count': 10,
    'useful': 5,
    'funny': 2,
    'cool': 1,
    'fans': 0,
    'average_stars': 4.2,
    'compliment_hot': 0,
    'compliment_more': 0,
    'compliment_profile': 0,
    'compliment_cute': 0,
    'compliment_list': 0,
    'compliment_note': 0,
    'compliment_plain': 0,
    'compliment_cool': 0,
    'compliment_funny': 0,
    'compliment_writer': 0,
    'compliment_photos': 0,
    'avg_sentiment': 0.1,
    'top_category_ratio': 1.0,
    'has_american_ratio': 0.8,
    'has_asian_food_ratio': 0.2,
    'has_hispanic_food_ratio': 0.0,
    'has_european_food_ratio': 0.0,
    'has_seafood_ratio': 0.0,
    'serves_alcohol_ratio': 0.5,
    'has_vegetarian_ratio': 0.3,
    'has_entertainment_ratio': 0.0,
    'has_coffee_or_tea_ratio': 0.1,
    'serves_sweets_ratio': 0.2,
    'years_elite': 0,
    'rating_variance': 0.5,
    'local_review_ratio': 1.0,
    'days_since_last_review': 7,
    'review_frequency': 0.1,
    'avg_price_preference': 2.0,
    'num_cities_reviewed': 1,
    # encoded categorical fields
    'base_city_encoded': 12,
    'state_code_encoded': 4,
    'top_category_encoded': 56,
    'elite_encoded': 0,

    'days_yelping': 3,}

In [12]:
xnew = build_feat_vector_for_new_user(new_user)

In [14]:
recommendations = recommend_for_new_user(recsys.model, xnew, top_n=10)


In [15]:
recommendations

[('DVBJRvnCpkqaYl6nHroaMg', 4.674956186567715),
 ('IO3apWfFnKPO2XRFmGfTEw', 4.66832259995951),
 ('zp9OcdUq2CWtQuI9FFBOQQ', 4.655983351202108),
 ('dclL14pr7OojCOKJ_ZbMbQ', 4.653984139854712),
 ('fEqiXG_B-fn__w0aeF3nBQ', 4.650308495543692),
 ('42dVj5q-LMx_iJxcq5Fzng', 4.646916558979619),
 ('atZ_olNKXOG4rEr6mccN8g', 4.6458091570211755),
 ('tARR9jhv5gi9TjsfSVmjmw', 4.641127256866857),
 ('OR7VJQ3Nk1wCcIbPN4TCQQ', 4.640423193371966),
 ('_aKr7POnacW_VizRKBpCiA', 4.638376456162067)]