In [209]:
import pandas as pd
data = pd.read_csv("data1.csv")
print(data.columns)
data['approx_costfor_two_people'] = pd.to_numeric(data['approx_costfor_two_people'], errors='coerce')
data['votes'] = pd.to_numeric(data['votes'], errors='coerce')
# Fill missing values
data['approx_costfor_two_people'] = data['approx_costfor_two_people'].fillna(data['approx_costfor_two_people'].mean())
data['votes'] = data['votes'].fillna(data['votes'].mean())
print(f"1.Shape of the dataset: {data.shape}")

north_indian_restaurants = data[data['cuisines'].str.contains('North Indian', na=False)]
print(f"2.Restaurants serving North Indian cuisine: {len(north_indian_restaurants)}")

cuisine_counts = data['cuisines'].value_counts()
if not cuisine_counts.empty:
    most_common_cuisine = cuisine_counts.idxmax()
    print(f"3.Most common cuisine overall: {most_common_cuisine}")
else:
    print("No cuisine data available.")
    
location_col = None
for col in ['location', 'address', 'listed_in', 'listed_in(city)', 'listed_incity']:
    if col in data.columns:
        location_col = col
        break

if location_col:
    avg_cost = data.groupby(location_col)['approx_costfor_two_people'].mean()
    most_expensive_area = avg_cost.idxmax()
    max_cost = avg_cost.max()
    print(f"4.Most expensive locality: {most_expensive_area} with average cost {max_cost:.2f}")
else:
    print("No suitable location column found to determine locality-wise cost.")

data['approx_costfor_two_people'] = pd.to_numeric(data['approx_costfor_two_people'], errors='coerce')
filtered_data = data.dropna(subset=['approx_costfor_two_people', 'listed_incity'])

localities = ['Banashankari', 'Church Street', 'Indiranagar', 'Whitefield']
filtered_data = filtered_data[filtered_data['listed_incity'].isin(localities)]
avg_cost_by_city = filtered_data.groupby('listed_incity')['approx_costfor_two_people'].mean()
highest_avg_city = avg_cost_by_city.idxmax()
highest_avg_cost = avg_cost_by_city.max()

print(f"5.Locality with the highest average cost for dining (for two people): {highest_avg_city}")
print(f"Average cost: ₹{highest_avg_cost:.2f}")


data['rate'] = data['rate'].apply(clean_rate)
data['votes'] = pd.to_numeric(data['votes'], errors='coerce')
q5_df = data[data['votes'] > 1000]
if 'rest_type' in data.columns:
    top_rest_type = q5_df.groupby('rest_type')['rate'].mean().sort_values(ascending=False)
    print(top_rest_type.head(1))


merged_df['approx_costfor_two_people'] = pd.to_numeric(
    merged_df['approx_costfor_two_people'].astype(str).str.replace(',', ''), errors='coerce'
)
print("6.Minimum cost for two people:", merged_df['approx_costfor_two_people'].min())


banashankari_data = data[data['listed_incity'] == 'Banashankari']
banashankari_online_orders = banashankari_data[banashankari_data['online_order'] == 'Yes']
total_online_orders = data[data['online_order'] == 'Yes'].shape[0]
percentage_banashankari = (banashankari_online_orders.shape[0] / total_online_orders) * 100
print(f"7. Percentage of total online orders received by restaurants in Banashankari: {percentage_banashankari:.2f}%")


filtered_data = data[(data['votes'] > 500) & (data['rate'] < 3.0)]
restaurant_counts_by_location = filtered_data['listed_incity'].value_counts()
top_locality = restaurant_counts_by_location.idxmax()
top_locality_count = restaurant_counts_by_location.max()
print(f"8. Locality with the most restaurants (over 500 votes and rating below 3.0): {top_locality} ({top_locality_count} restaurants)")
restaurant_type_diversity = data.groupby('listed_incity')['rest_type'].nunique()
sorted_localities = restaurant_type_diversity.sort_values(ascending=False)
top_locality = sorted_localities.idxmax()
top_diversity_count = sorted_localities.max()

print(f"9.Locality with the most diverse restaurant types: {top_locality} ({top_diversity_count} types)")
merged_df['approx_costfor_two_people'] = (
    merged_df['approx_costfor_two_people']
    .astype(str)
    .str.replace(',', '', regex=False)
    .astype(float)
)

buffet = merged_df[merged_df['rest_type'].str.contains('Buffet', na=False)]
avg_cost_buffet = buffet['approx_costfor_two_people'].mean()

delivery = merged_df[merged_df['rest_type'].str.contains('Delivery', na=False)]
avg_cost_delivery = delivery['approx_costfor_two_people'].mean()

cost_difference = abs(avg_cost_buffet - avg_cost_delivery)

print(f"10.Buffet Avg Cost: ₹{avg_cost_buffet:.2f}")
print(f"Delivery Avg Cost: ₹{avg_cost_delivery:.2f}")
print(f"Cost Difference: ₹{cost_difference:.2f}")
if 'online_order' in data.columns:
    online_votes = df[data['online_order'] == 'Yes']['votes'].max()
    print("11. Max votes (online order):", online_votes)

both_cuisines_data = data[(data['cuisines'].str.contains('North Indian', case=False, na=False)) & 
                          (data['cuisines'].str.contains('Chinese', case=False, na=False))]

average_rating = both_cuisines_data['rate'].mean()

print(f"12.Average rating of restaurants that serve both North Indian and Chinese cuisines: {average_rating}")

data['estimated_revenue'] = data['votes'] * data['approx_costfor_two_people']

revenue_by_area = data.groupby('listed_incity')['estimated_revenue'].sum()

most_profitable_area = revenue_by_area.idxmax()
print(f"13.The most profitable area for Zomato based on potential revenue estimation is: {most_profitable_area}")

data['rate'] = pd.to_numeric(data['rate'], errors='coerce')
rest_type_ratings = data.groupby('rest_type')['rate'].mean().sort_values()
lowest_rated_rest_type = rest_type_ratings.idxmin()
lowest_rating = rest_type_ratings.min()

print("14.Restaurant type with the lowest average rating (needs focus):", lowest_rated_rest_type)
print("Average rating:", lowest_rating)

filtered_df = data[
    (data['rate'].astype(float) > 4.2) &
    (data['votes'].astype(int) > 500) &
    (data['online_order'] == 'Yes')
]

top_area = filtered_df['listed_incity'].value_counts().idxmax()

print("15. Best area for Zomato to invest:", top_area)


Index(['online_order', 'book_table', 'rate', 'votes', 'rest_type',
       'dish_liked', 'cuisines', 'approx_costfor_two_people', 'listed_intype',
       'listed_incity'],
      dtype='object')
1.Shape of the dataset: (51717, 10)
2.Restaurants serving North Indian cuisine: 21085
3.Most common cuisine overall: North Indian
4.Most expensive locality: Brookefield with average cost 427.77
5.Locality with the highest average cost for dining (for two people): Church Street
Average cost: ₹424.99
rest_type
Bakery    4.8
Name: rate, dtype: float64
6.Minimum cost for two people: 40.0
7. Percentage of total online orders received by restaurants in Banashankari: 1.79%
8. Locality with the most restaurants (over 500 votes and rating below 3.0): Brookefield (8 restaurants)
9.Locality with the most diverse restaurant types: BTM (61 types)
10.Buffet Avg Cost: ₹nan
Delivery Avg Cost: ₹415.05
Cost Difference: ₹nan
11. Max votes (online order): 16832
12.Average rating of restaurants that serve both North 

  online_votes = df[data['online_order'] == 'Yes']['votes'].max()


In [113]:
# Step 1: Install folium and IPython
!pip install folium
!pip install IPython



In [212]:

import folium
from IPython.display import IFrame
import pandas as pd
import numpy as np

data1 = pd.read_csv("data1.csv")
data2 = pd.read_csv("data2.csv")

merged_df = pd.merge(data1, data2, on='listed_incity', how='left')

#Filter for Italian Restaurants
italian_df = merged_df[merged_df['cuisines'].str.contains("Italian", case=False, na=False)]

# Create the Map
italian_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Add Markers for Each Italian Restaurant
for _, row in italian_df.iterrows():
    if not np.isnan(row['Latitude']) and not np.isnan(row['Longitude']):
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=f"{row['listed_incity']} | {row['cuisines']} | Rating: {row['rate']}⭐",
            icon=folium.Icon(color='purple')
        ).add_to(italian_map)


italian_map.save("italian_restaurants_map.html")

IFrame(src='italian_restaurants_map.html', width=800, height=600)


In [211]:
import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame
import pandas as pd
import numpy as np


data1 = pd.read_csv("data1.csv")
data2 = pd.read_csv("data2.csv")


merged_df = pd.merge(data1, data2, on='listed_incity', how='left')

# Restaurant Density Visualization using Folium

# Initialize the map centered on Bangalore (12.9716, 77.5946) with zoom level 12
restaurant_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Initialize MarkerCluster to handle overlapping markers
marker_cluster = MarkerCluster().add_to(restaurant_map)

# Loop through the dataset to add a marker for each restaurant with valid latitude and longitude
for _, row in merged_df.iterrows():
    if not np.isnan(row['Latitude']) and not np.isnan(row['Longitude']):
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=f"{row['listed_incity']} | Rating: {row['rate']} | Cost for Two: ₹{row['approx_costfor_two_people']}",
            icon=folium.Icon(color='blue')
        ).add_to(marker_cluster)

#Save the Map as an HTML file
restaurant_map.save("restaurant_density_map.html")

# Display the Map in the Jupyter Notebook using IFrame
IFrame(src='restaurant_density_map.html', width=800, height=600)
