## **Imports**


In [3]:
import os
import json
import folium
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from geopy.geocoders import Nominatim
from IPython.display import display, HTML
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer

## **Overriding Defaults**


In [2]:
pd.set_option('display.max_rows', 20)

## **Loading Dataset**


In [8]:
restaurantsDf = pd.read_csv('./data/registered_restaurants.csv', encoding='latin-1')
reviewsDf = pd.read_csv('./data/modified_reviews.csv', encoding='latin-1')

In [9]:
restaurantsDf.head(1)

Unnamed: 0,business_id,address,city,latitude,longitude,name,rating,review_count,categories,slug,id,userId
0,---kPU91CF4Lq2-WlRu9Lw,4903 State Rd 54,New Port Richey,28.217288,-82.733344,Frankie's Raw Bar,4.5,24,"Seafood, Latin American",frankies-raw-bar,65eda330d745307cb1a92556,65d8e5f75b89ca028e3fd8cb


In [10]:
reviewsDf.head(1)

Unnamed: 0,id,review_id,user_id,restaurant_id,slug,rating,content,createdAt
0,65eddbf4b6474deda4d94197,zM7u3iffMjniyvQAaN9Tnw,65d92f185b89ca028e45e930,65eda92ad745307cb1a957d3,nSi0NLoRHw,1.0,So disappointing on so many levels. Have been ...,2012-05-29 03:51:08


# **<div align="center">`User Data`</div>**

In [1]:
userId = "65d94d8d5b89ca028e47d7f6"

In [4]:
# Initialize a geolocator
geolocator = Nominatim(user_agent="RRS")

# Geocoding - Get address components from coordinates
location = geolocator.reverse((user_coordinates[0], user_coordinates[1]), language="en")
address_components = location.raw.get("address", {})

# Extract the city
user_city = address_components.get("city", "City not found")
print("User City:", user_city)

User City: New Delhi


In [11]:
# Center the map around the user location
map_center = [user_coordinates[0], user_coordinates[1]]
user_location = folium.Map(location=map_center, zoom_start=15)
folium.Marker(location=user_coordinates, popup="User Location").add_to(user_location)

# Display the map
# user_location

<folium.map.Marker at 0x232de388610>

## **User Preferred Cuisine**


In [15]:
restaurant_ids = [reviewsDf['restaurantId'] for reviewsDf in user_reviews_data]
reviewed_data = restaurantsDf[restaurantsDf['id'].isin(restaurant_ids)].reset_index(drop=True)
reviewed_data

Unnamed: 0,business_id,address,city,latitude,longitude,name,rating,review_count,categories,slug,id,userId


In [None]:
# unique set of cuisines
preferred_cuisines = set()

# Iterate through rows in the DataFrame
for row in reviewed_data.itertuples():
    cuisines = row.Cuisines.split(', ')
    preferred_cuisines.update(cuisines)

# Create a text string with cuisines for the word cloud
cuisines_text = ', '.join(preferred_cuisines)

# Generate a word cloud with a custom color map and black background
wordcloud = WordCloud(width=800, height=300, background_color='white').generate(cuisines_text)

# Display the word cloud using matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
# Split the 'Cuisines' column into a list of cuisines and explode the DataFrame
reviewed_data_exploded = reviewed_data.assign(Cuisines=reviewed_data['Cuisines'].str.split(', ')).explode('Cuisines')

# Count the number of restaurants for each cuisine in the city
reviewed_data_cuisines = reviewed_data_exploded['Cuisines'].value_counts()

# Create a pie chart
plt.figure(figsize=(12, 8))
plt.pie(reviewed_data_cuisines, labels=reviewed_data_cuisines.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
plt.title('User Preferred Cuisines')
plt.show()

In [None]:
# Create a copy of the DataFrame
filteredDf = newDf[newDf['City'] == user_city]

# Reset the index of the filtered DataFrame
filteredDf = filteredDf.drop_duplicates(subset='Restaurant Name').reset_index(drop=True)

# Print the filtered DataFrame
filteredDf

# **<div align="center">`Vector Space Model`</div>**

In [None]:
# Create a TF-IDF vectorizer to convert cuisines into numerical data
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(filteredDf["Cuisines"].tolist())

# Compute the cosine similarity between cuisines
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim.shape

In [None]:
# Plotting the cosine similarity matrix

# Sample size to show relations
sample_size = 5
sampled_cosine_sim = cosine_sim[:sample_size, :sample_size]

plt.figure(figsize=(10, 8))

# Generate heatmap
sns.heatmap(sampled_cosine_sim, annot=True, cmap='coolwarm', xticklabels=filteredDf.index[:sample_size], yticklabels=filteredDf.index[:sample_size])

# Rotate the y-axis labels for better readability
plt.yticks(rotation=0)

# Set x-axis and y-axis labels using sampled indices
plt.xticks(np.arange(sample_size), filteredDf.index[:sample_size])
plt.yticks(np.arange(sample_size), filteredDf.index[:sample_size])

# labels and title
plt.xlabel('Restaurant Index')
plt.ylabel('Restaurant Index')
plt.title('Cosine Similarity between Restaurants based on Cuisines')

# Show the plot
plt.show()

In [None]:
# Function to get restaurant recommendations
def get_recommendations(similarity_threshold, restaurant_rating_threshold):

    # Create an empty DataFrame to store recommended restaurants
    recommendations = pd.DataFrame(columns=['Restaurant ID', 'Restaurant Name', 'City', 'Cuisines', 'Rating', 'Reviews'])

    # Loop through reviews in the DataFrame
    for index, review in reviewed_data.iterrows():

        # Get restaurant indices based on name
        name = review['Restaurant Name']
        index_in_filteredDf = indices[name]

        # Get the cosine similarity scores for the cuisine
        sim_scores = list(enumerate(cosine_sim[index_in_filteredDf]))

        # Sort the restaurants based on similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Get the restaurant indices with high similarity
        top_restaurant_indices = [x[0] for x in sim_scores if x[1] >= similarity_threshold]

        # Get the restaurant indices with high similarity and rating greater than 3
        top_restaurant_indices = [i for i in top_restaurant_indices if (
            filteredDf.at[i, 'Rating'] > restaurant_rating_threshold and
            filteredDf.at[i, 'Restaurant ID'] != review['Restaurant ID']
        )]

        # Add the recommendations to the DataFrame
        recommendations = pd.concat([recommendations, filteredDf.iloc[top_restaurant_indices]])

    # Drop duplicate rows based on 'Restaurant Name' and reset index
    recommendations = recommendations.drop_duplicates(subset='Restaurant Name').reset_index(drop=True)

    return recommendations

# Get restaurant recommendations based on user reviews
recommendations = get_recommendations(0.75, 3)

# Display the recommendations
recommendations

In [None]:
latitude = recommendations['Latitude'].tolist()
longitude = recommendations['Longitude'].tolist()
restaurants = recommendations['Restaurant Name'].tolist()
ratings = recommendations['Rating'].tolist()

# Center the map around the first restaurant
map_center = [latitude[0], longitude[0]]

restaurant_map = folium.Map(location=map_center, zoom_start=11)

# Add markers for each restaurant
for lat, lon, restaurant, rating in zip(latitude, longitude, restaurants, ratings):
  folium.Marker([lat, lon], popup=f'Restaurant Name: {restaurant}\nRating: {rating:.1f}').add_to(restaurant_map)

# Display the map
restaurant_map