In [None]:
# Interactive Restaurant Density Mapping – Self‑Contained Script

# 1) Imports
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame

# 2) Load the datasets (ensure these CSVs are in your working directory)
data1 = pd.read_csv('data1.csv')
data2 = pd.read_csv('data2.csv')

# 3) Clean & preprocess data1
#   – Handle rate column (replace '-', 'NEW'; strip '/5'; convert to float; fill NaNs)
data1['rate'] = (data1['rate']
    .replace({'-': pd.NA, 'NEW': pd.NA})
    .str.replace('/5','', regex=False))
data1['rate'] = pd.to_numeric(data1['rate'], errors='coerce')
data1['rate'] = data1['rate'].fillna(data1['rate'].median())

#   – Handle cost column (remove commas; convert; fill NaNs)
data1['approx_costfor_two_people'] = (data1['approx_costfor_two_people']
    .astype(str)
    .str.replace(',','', regex=False))
data1['approx_costfor_two_people'] = pd.to_numeric(
    data1['approx_costfor_two_people'], errors='coerce')
data1['approx_costfor_two_people'] = data1['approx_costfor_two_people']\
    .fillna(data1['approx_costfor_two_people'].median())

#   – Fill missing text fields
data1['dish_liked'].fillna('Not Available', inplace=True)
data1['cuisines'].fillna('Other',         inplace=True)
data1['rest_type'].fillna('Unknown',      inplace=True)

#   – Votes to int
data1['votes'] = data1['votes'].fillna(data1['votes'].median()).astype(int)

#   – Binary encode
data1['online_order'] = data1['online_order'].map({'Yes':1,'No':0})
data1['book_table']  = data1['book_table'].map({'Yes':1,'No':0})

# 4) Merge with location data
merged_df = pd.merge(data1, data2, on='listed_incity', how='left')

# 5) Build the restaurant density map
density_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)
marker_cluster = MarkerCluster().add_to(density_map)

for _, row in merged_df.iterrows():
    lat, lon = row['Latitude'], row['Longitude']
    if pd.notnull(lat) and pd.notnull(lon):
        folium.Marker(
            location=[lat, lon],
            popup=(
                f"{row['listed_incity']}<br>"
                f"Rating: {row['rate']}/5<br>"
                f"Cost for Two: ₹{row['approx_costfor_two_people']}"
            ),
            tooltip=row['listed_incity']
        ).add_to(marker_cluster)

# 6) Save and display the map
density_map.save('restaurant_density.html')
IFrame('restaurant_density.html', width=900, height=600)


In [7]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame


In [8]:
data1 = pd.read_csv('data1.csv')
data2 = pd.read_csv('data2.csv')

In [9]:
merged_df.shape

(51750, 11)

In [10]:
data1 = pd.read_csv('data1.csv')
data2 = pd.read_csv('data2.csv')


In [11]:
data1['rate'] = (data1['rate']
    .replace({'-': pd.NA, 'NEW': pd.NA})
    .str.replace('/5','', regex=False))
data1['rate'] = pd.to_numeric(data1['rate'], errors='coerce')
data1['rate'] = data1['rate'].fillna(data1['rate'].median())

In [12]:
north_indian = merged_df[merged_df['cuisines'].str.contains('North Indian', na=False)]
north_indian.shape[0]

13192

In [13]:
from collections import Counter

all_cuisines = merged_df['cuisines'].dropna().str.split(', ')
flat_list = [item for sublist in all_cuisines for item in sublist]
pd.Series(flat_list).value_counts().head(1)

North Indian    13192
dtype: int64

In [14]:
merged_df[merged_df['cuisines'].str.contains('North Indian', case=False, na=False)].shape[0]



13192

In [15]:
merged_df['cuisines'].value_counts().head(1)


North Indian    13192
Name: cuisines, dtype: int64

In [16]:
merged_df.groupby('listed_incity')['approx_costfor_two_people'].mean().sort_values(ascending=False).head(1)


listed_incity
Indiranagar    599.898817
Name: approx_costfor_two_people, dtype: float64

In [17]:
merged_df[merged_df['votes'] > 1000].groupby('rest_type')['rate'].mean().sort_values(ascending=False).head(1)


Series([], Name: rate, dtype: float64)

In [18]:
merged_df['approx_costfor_two_people'].min()


250.0

In [19]:
banashankari = merged_df[(merged_df['online_order'] == 1) & (merged_df['listed_incity'] == 'Banashankari')].shape[0]
total_online = merged_df[merged_df['online_order'] == 1].shape[0]
percentage = (banashankari / total_online) * 100
percentage


0.0

In [20]:
merged_df[(merged_df['votes'] > 500) & (merged_df['rate'] < 3)]['listed_incity'].value_counts().head(1)


Koramangala    1362
Name: listed_incity, dtype: int64

In [21]:
merged_df.groupby('listed_incity')['rest_type'].nunique().sort_values(ascending=False).head(1)


listed_incity
BTM    4
Name: rest_type, dtype: int64

In [23]:
merged_df[merged_df['online_order'] == 1]['votes'].max()


999

In [25]:
merged_df['estimated_revenue'] = merged_df['approx_costfor_two_people'] * merged_df['votes']
merged_df.groupby('listed_incity')['estimated_revenue'].sum().sort_values(ascending=False).head(1)


listed_incity
Koramangala    3.919916e+09
Name: estimated_revenue, dtype: float64

In [24]:
merged_df[
    merged_df['cuisines'].str.contains('North Indian', na=False, case=False) & 
    merged_df['cuisines'].str.contains('Chinese', na=False, case=False)
]['rate'].mean()


nan

In [26]:
merged_df.groupby('rest_type')['rate'].mean().sort_values().head(1)


rest_type
Quick Bites    3.741083
Name: rate, dtype: float64

In [27]:
merged_df[(merged_df['rate'] > 4.2) & (merged_df['votes'] > 500) & (merged_df['online_order'] == 1)]['listed_incity'].value_counts().head(1)


HSR    1397
Name: listed_incity, dtype: int64