In [None]:
#Step 0: Import libraries & load dataset
import pandas as pd

df = pd.read_excel("Dataset .xlsx")


In [None]:
#Step 1: Handle missing cuisine values
df['Cuisines'] = df['Cuisines'].fillna('Unknown')


In [None]:
#Step 2: Split multiple cuisines into separate rows
df_cuisine = df.copy()
df_cuisine['Cuisines'] = df_cuisine['Cuisines'].str.split(', ')
df_cuisine = df_cuisine.explode('Cuisines')


In [None]:
#Step 3: Most popular cuisines (based on votes)
popular_cuisines = (
    df_cuisine.groupby('Cuisines')['Votes']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

popular_cuisines


Unnamed: 0_level_0,Votes
Cuisines,Unnamed: 1_level_1
North Indian,595981
Chinese,364351
Italian,329265
Continental,288255
Fast Food,184058
American,183117
Cafe,177568
Mughlai,151946
Desserts,105889
Asian,104303


In [None]:
#Step 4: Average rating for each cuisine
avg_rating_by_cuisine = (
    df_cuisine.groupby('Cuisines')['Aggregate rating']
    .mean()
    .sort_values(ascending=False)
)

avg_rating_by_cuisine.head(10)


Unnamed: 0_level_0,Aggregate rating
Cuisines,Unnamed: 1_level_1
Sunda,4.9
Bï¿½_rek,4.7
Taiwanese,4.65
Ramen,4.5
Dim Sum,4.466667
Hawaiian,4.4125
Dï¿½_ner,4.4
Bubble Tea,4.4
Curry,4.4
Kebab,4.38


In [None]:
#Step 5: Cuisines with high ratings & sufficient votes
cuisine_analysis = (
    df_cuisine.groupby('Cuisines')
    .agg(
        Avg_Rating=('Aggregate rating', 'mean'),
        Total_Votes=('Votes', 'sum')
    )
)

# Filter cuisines with reasonable popularity
top_rated_cuisines = cuisine_analysis[
    cuisine_analysis['Total_Votes'] > 1000
].sort_values('Avg_Rating', ascending=False)

top_rated_cuisines.head(10)


Unnamed: 0_level_0,Avg_Rating,Total_Votes
Cuisines,Unnamed: 1_level_1,Unnamed: 2_level_1
Sunda,4.9,5514
Bï¿½_rek,4.7,1305
Ramen,4.5,1259
Dim Sum,4.466667,1755
Hawaiian,4.4125,8012
Curry,4.4,2059
Kebab,4.38,1536
Izgara,4.35,1166
Filipino,4.34,3789
South African,4.333333,1806
