In [None]:
import pandas as pd
df_ratings = pd.read_csv("restaurant_data/rating_final.csv")
df_restaurants = pd.read_csv("restaurant_data/geoplaces2.csv", encoding="ISO-8859-1")  # Restaurant details
df_user_cuisine = pd.read_csv("restaurant_data/usercuisine.csv")  # User preferences
df_restaurant_cuisine = pd.read_csv("restaurant_data/chefmozcuisine.csv")  # Restaurant cuisine types

In [3]:
# Merge ratings with restaurant details (name, location, etc.)
df_merged = df_ratings.merge(df_restaurants[['placeID', 'name', 'city', 'state', 'price']], on='placeID', how='left')

In [4]:
# Merge with restaurant cuisine details
df_merged = df_merged.merge(df_restaurant_cuisine, on='placeID', how='left')

In [5]:
# Merge with user cuisine preferences
df_merged = df_merged.merge(df_user_cuisine, on='userID', how='left')

In [None]:
print(df_merged.head())

  userID  placeID  rating  food_rating  service_rating  \
0  U1077   135085       2            2               2   
1  U1077   135038       2            2               1   
2  U1077   132825       2            2               2   
3  U1077   135060       1            2               2   
4  U1068   135104       1            1               2   

                      name             city   state   price Rcuisine_x  \
0   Tortas Locas Hipocampo  San Luis Potosi     SLP  medium  Fast_Food   
1    Restaurant la Chalita  San Luis Potosi     SLP  medium        NaN   
2          puesto de tacos           s.l.p.  s.l.p.     low    Mexican   
3  Restaurante Marisco Sam  San Luis Potosi     SLP  medium    Seafood   
4                     vips                ?       ?  medium    Mexican   

  Rcuisine_y  
0    Mexican  
1    Mexican  
2    Mexican  
3    Mexican  
4    Mexican  


In [8]:
! pip install scikit-surprise


Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting scikit-surprise
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/d9/8d/40ac32e703f3808159f9e2b33760cfbd6224cc7783eb663091eddc9581c2/scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies: started
  Installing build dependencies: still running...
  Installing build dependencies: still running...
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml): started
  Building wheel for scikit-surprise (pyproject.toml): finished with status 'done'
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp312-cp312-win_amd64.whl size=1291297 sha256=6126

DEPRECATION: Loading egg at c:\users\priyanshu\appdata\local\programs\python\python312\lib\site-packages\dlib-19.24.99-py3.12-win-amd64.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330

[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [None]:
reader = Reader(rating_scale=(0, 2))

In [13]:
# Load merged data into Surprise format
data = Dataset.load_from_df(df_merged[['userID', 'placeID', 'rating']], reader)

In [14]:
train, test = train_test_split(data, test_size= 0.2)

In [15]:
# Train SVD model
model = SVD()
model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2063efe6b40>

In [16]:
# Test model accuracy
from surprise.accuracy import rmse
predictions = model.test(test)
print("Model RMSE:", rmse(predictions))

RMSE: 0.3415
Model RMSE: 0.34149215469232747


In [19]:
def get_collaborative_recommendations(user_id, num_recommendations=5):
    # Get all unique restaurant IDs
    all_restaurants = df_merged['placeID'].unique()
    
    # Predict ratings for all restaurants
    predictions = [model.predict(user_id, restaurant) for restaurant in all_restaurants]
    
    # Sort predictions by estimated rating (descending order)
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Get top recommended restaurant IDs
    top_recommendations = [pred.iid for pred in predictions[:num_recommendations]]
    
    # Get restaurant details for recommendations
    return df_merged[df_merged['placeID'].isin(top_recommendations)][['name', 'Rcuisine_y', 'city']].drop_duplicates()


In [None]:
get_collaborative_recommendations("U1077")  


Unnamed: 0,name,Rcuisine_y,city
0,Tortas Locas Hipocampo,Mexican,San Luis Potosi
122,Mariscos El Pescador,Cafe-Coffee_Shop,San Luis Potosi
123,Mariscos El Pescador,Sushi,San Luis Potosi
124,Mariscos El Pescador,Latin_American,San Luis Potosi
125,Mariscos El Pescador,Deli-Sandwiches,San Luis Potosi
...,...,...,...
1831,Tortas Locas Hipocampo,Brazilian,San Luis Potosi
1832,Tortas Locas Hipocampo,Southern,San Luis Potosi
3516,El Rincon de San Francisco,Game,San Luis Potosi
3556,la Cantina,Bar,San Luis Potosi


In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [24]:
# Fill missing cuisine values with an empty string
df_merged['Rcuisine_y'].fillna('', inplace=True)

# Convert cuisine type into a single string for each restaurant
df_merged['cuisine_combined'] = df_merged['Rcuisine_y']

# Apply TF-IDF Vectorization on cuisine types
vectorizer = TfidfVectorizer()
cuisine_matrix = vectorizer.fit_transform(df_merged['cuisine_combined'])

# Compute Cosine Similarity between restaurants based on cuisine type
cosine_sim = cosine_similarity(cuisine_matrix, cuisine_matrix)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_merged['Rcuisine_y'].fillna('', inplace=True)


In [25]:
def recommend_similar_restaurants_location_based(restaurant_name, user_city, num_recommendations=5):
    # Find the restaurant index
    idx = df_merged[(df_merged['name'] == restaurant_name) & (df_merged['city'] == user_city)].index
    
    if len(idx) == 0:
        return "Restaurant not found in database for this city."
    
    idx = idx[0]
    
    # Get similarity scores for the restaurant
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort restaurants by similarity score (descending)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get indices of top similar restaurants
    similar_indices = [i[0] for i in sim_scores[1:num_recommendations*2]]  # Get more recommendations first
    
    # Filter recommendations by the same city
    recommended_restaurants = df_merged.iloc[similar_indices][['name', 'Rcuisine_y', 'city']].drop_duplicates()
    recommended_restaurants = recommended_restaurants[recommended_restaurants['city'] == user_city].head(num_recommendations)
    
    return recommended_restaurants

In [26]:
def hybrid_recommendations(user_id, restaurant_name, user_city, num_recommendations=5):
    # Get collaborative filtering recommendations
    collab_recs = get_collaborative_recommendations(user_id, num_recommendations)
    
    # Get content-based recommendations
    content_recs = recommend_similar_restaurants_location_based(restaurant_name, user_city, num_recommendations)
    
    # Merge results (removing duplicates)
    hybrid_results = pd.concat([collab_recs, content_recs]).drop_duplicates().head(num_recommendations)
    
    return hybrid_results


In [27]:
hybrid_recommendations("U1077", "Tortas Locas Hipocampo", "San Luis Potosi")


Unnamed: 0,name,Rcuisine_y,city
0,Tortas Locas Hipocampo,Mexican,San Luis Potosi
122,Mariscos El Pescador,Cafe-Coffee_Shop,San Luis Potosi
123,Mariscos El Pescador,Sushi,San Luis Potosi
124,Mariscos El Pescador,Latin_American,San Luis Potosi
125,Mariscos El Pescador,Deli-Sandwiches,San Luis Potosi
