In [12]:
import pandas as pd

# STEP 1: Load the dataset
df = pd.read_excel('Dataset .xlsx')

# STEP 2: Clean missing values
df = df.dropna(subset=['Cuisines', 'Aggregate rating'])

# STEP 3: Standardize cuisine combinations
# Strip spaces and sort cuisines alphabetically so "Chinese, North Indian" = "North Indian, Chinese"
df['Cuisine Combo'] = df['Cuisines'].apply(lambda x: ', '.join(sorted([c.strip() for c in x.split(',')])))

# STEP 4: Count most common combinations
combo_counts = df['Cuisine Combo'].value_counts()
top_combos = combo_counts.head(10)

# STEP 5: Calculate average rating for each combo
combo_ratings = df.groupby('Cuisine Combo')['Aggregate rating'].mean().round(2)

# STEP 6: Combine counts and ratings into one DataFrame
combo_df = pd.DataFrame({
    'Combo': top_combos.index,
    'Count': top_combos.values,
    'Average Rating': top_combos.index.map(combo_ratings)
})

# STEP 7: Display result
print("🔸 Top 10 Cuisine Combinations with Ratings:")
print(combo_df)


🔸 Top 10 Cuisine Combinations with Ratings:
                            Combo  Count  Average Rating
0                    North Indian    936            1.67
1           Chinese, North Indian    616            2.33
2           Mughlai, North Indian    394            2.77
3                       Fast Food    354            2.12
4                         Chinese    354            2.04
5  Chinese, Mughlai, North Indian    306            2.62
6                            Cafe    299            2.89
7                          Bakery    218            1.92
8                Bakery, Desserts    181            2.38
9              Chinese, Fast Food    159            2.07
