In [123]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import warnings

warnings.filterwarnings('ignore')

In [124]:
# Load Preprocessed Data
zomato_df = pd.read_csv("restaurant1_cleaned.csv")
print("Data Loaded. Shape:", zomato_df.shape)

Data Loaded. Shape: (23565, 15)


In [125]:
# Fill NaNs in 'dish_liked' for safety
zomato_df['dish_liked'].fillna('', inplace=True)

In [126]:
# Safety check in case of missing values
zomato_df['combined_features'].fillna('', inplace=True)

In [127]:
# Create TF-IDF Matrix
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1,2))
tfidf_matrix = tfidf.fit_transform(zomato_df['combined_features'])  # should now work

In [128]:
# Calculate Cosine Similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [129]:
# Create Index Mapping
indices = pd.Series(zomato_df.index, index=zomato_df['name'].str.lower()).drop_duplicates()

In [132]:
# Recommendation Function
def recommend(name, preferred_price=None, min_rating=None, cosine_sim=cosine_sim):
    name = name.lower()
    if name not in indices:
        return "Restaurant not found."

    idx = indices[name]
    sim_scores = list(enumerate(cosine_sim[idx].tolist()))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:]  # Exclude the restaurant itself

    restaurant_indices = [i[0] for i in sim_scores]
    recommendations = zomato_df.iloc[restaurant_indices].copy()

    if preferred_price:
        recommendations = recommendations[recommendations['price_range'] == preferred_price]
    if min_rating:
        recommendations = recommendations[recommendations['rate'] >= min_rating]

    # Top 10 results
    recommendations = recommendations.head(10)

    # Formatting
    recommendations = recommendations[['name', 'cuisines', 'rate', 'cost']]
    recommendations.reset_index(drop=True, inplace=True)
    recommendations.index += 1
    recommendations['rate'] = recommendations['rate'].round(1)
    recommendations['cost'] = recommendations['cost'].astype(int).astype(str) + " ₹"
    recommendations.columns = ['Restaurant Name', 'Cuisines', 'Rating (★)', 'Approx Cost']

    return recommendations



In [133]:
# Example Usage
from IPython.display import display
display(recommend('Jalsa', preferred_price='Moderate', min_rating=4.0))

Unnamed: 0,Restaurant Name,Cuisines,Rating (★),Approx Cost
1,Spice Elephant,"Chinese, North Indian, Thai",4.1,800 ₹
2,Penthouse Café,"Cafe, Italian, Continental",4.0,700 ₹
3,Smacznego,"Cafe, Mexican, Italian, Momos, Beverages",4.2,550 ₹
4,Onesta,"Pizza, Cafe, Italian",4.6,600 ₹
5,Café Shuffle,"Cafe, Italian, Continental",4.2,600 ₹
