In [2]:
import numpy as np
import pandas as pd
import string
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from nltk.corpus import stopwords
import warnings

# Ignore warnings for clean output
warnings.filterwarnings("ignore")

# Load the dataset
zomato_real = pd.read_csv("zomato.csv")

# Data cleaning and preprocessing
zomato = zomato_real.drop(['url', 'dish_liked', 'phone'], axis=1)
zomato.drop_duplicates(inplace=True)
zomato.dropna(how='any', inplace=True)

# Renaming columns for simplicity
zomato = zomato.rename(columns={
    'approx_cost(for two people)': 'cost',
    'listed_in(type)': 'type',
    'listed_in(city)': 'city'
})

# Data transformations
zomato['cost'] = zomato['cost'].astype(str).apply(lambda x: x.replace(',', '')).astype(float)
zomato = zomato[zomato['rate'] != 'NEW']
zomato = zomato[zomato['rate'] != '-']
zomato['rate'] = zomato['rate'].apply(lambda x: x.replace('/5', '')).astype(float)
zomato['name'] = zomato['name'].str.title()
zomato['online_order'] = zomato['online_order'].replace({'Yes': True, 'No': False})
zomato['book_table'] = zomato['book_table'].replace({'Yes': True, 'No': False})

# Compute mean rating for each restaurant
zomato['Mean Rating'] = zomato.groupby('name')['rate'].transform('mean')

# Text preprocessing
zomato["reviews_list"] = zomato["reviews_list"].str.lower()

# Remove punctuation
def remove_punctuation(text):
    return text.translate(str.maketrans('', '', string.punctuation))

zomato["reviews_list"] = zomato["reviews_list"].apply(remove_punctuation)

# Remove stopwords
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    return " ".join([word for word in text.split() if word not in STOPWORDS])

zomato["reviews_list"] = zomato["reviews_list"].apply(remove_stopwords)

# Remove URLs
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

zomato["reviews_list"] = zomato["reviews_list"].apply(remove_urls)

# Drop unnecessary columns for recommendation
zomato = zomato.drop(['address', 'rest_type', 'type', 'menu_item', 'votes'], axis=1)

# Create a sample dataset (50% of the original)
df_percent = zomato.sample(frac=0.5, random_state=42)
df_percent.set_index('name', inplace=True)
indices = pd.Series(df_percent.index)

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=1, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_percent['reviews_list'])

# Calculate cosine similarities
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

# Recommendation function
def recommend(name, cosine_similarities=cosine_similarities, filters=None):
    recommend_restaurant = []
    idx = indices[indices == name].index[0]
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    top30_indexes = list(score_series.iloc[1:31].index)  # Skip the first, which is the same restaurant

    # Fetch restaurant names and details
    for each in top30_indexes:
        recommend_restaurant.append(list(df_percent.index)[each])
    
    # Create a dataframe with similar restaurants
    df_new = pd.DataFrame(columns=['cuisines', 'Mean Rating', 'cost', 'online_order', 'book_table'])
    for each in recommend_restaurant:
        df_new = pd.concat([df_new, df_percent[['cuisines', 'Mean Rating', 'cost', 'online_order', 'book_table']].loc[[each]]])
    
    # Apply filters if provided
    if filters:
        for key, value in filters.items():
            if isinstance(value, tuple):  # For range filters like cost
                df_new = df_new[(df_new[key] >= value[0]) & (df_new[key] <= value[1])]
            else:
                df_new = df_new[df_new[key] == value]
    
    df_new = df_new.drop_duplicates(subset=['cuisines', 'Mean Rating', 'cost'])
    df_new = df_new.sort_values(by='Mean Rating', ascending=False).head(10)
    
    print(f"TOP {len(df_new)} RESTAURANTS LIKE {name.upper()} WITH SIMILAR REVIEWS:")
    return df_new

# Examples of Outputs

# 1. General recommendation
print(recommend('Pai Vihar'))

# 2. Low-cost recommendations
print(recommend('Pai Vihar', filters={'cost': (0, 500)}))

# 3. Cuisine-based recommendations
print(recommend('Pai Vihar', filters={'cuisines': 'South Indian'}))

# 5. High-rated recommendations
print(recommend('Pai Vihar', filters={'Mean Rating': (4.0, 5.0)}))

# 6. Recommendations with online order availability
print(recommend('Pai Vihar', filters={'online_order': True}))

# 7. Recommendations with table booking feature
print(recommend('Pai Vihar', filters={'book_table': True}))

# 8. Personalized recommendations (Pure Veg, cost < ₹500)
# Add a custom filter for cuisines if required
print(recommend('Pai Vihar', filters={'cuisines': 'Pure Veg', 'cost': (0, 500)}))


TOP 10 RESTAURANTS LIKE PAI VIHAR WITH SIMILAR REVIEWS:
                                                                    cuisines  \
Nisarga Garden                    South Indian, Chinese, Street Food, Juices   
Samosa Singh                                            North Indian, Mithai   
Samosa Singh                         Street Food, Fast Food, Rolls, Desserts   
Kadai Crust - Amma Veetu Samayal            Chettinad, South Indian, Biryani   
Upahar Sagar                             South Indian, Chinese, North Indian   
Sunehri                                                         South Indian   
Juicemaker                                              Beverages, Fast Food   
Juicemaker                                                 Juices, Fast Food   
Juice Junction Food Court                       Beverages, Juices, Fast Food   
Juice Junction Food Court           Juices, South Indian, Chinese, Fast Food   

                                  Mean Rating   cost online_ord