In [10]:
import pandas as pd

# STEP 1: Load the dataset
df = pd.read_excel('Dataset .xlsx')

# STEP 2: Handle missing values in the 'Cuisines' column
df['Cuisines'] = df['Cuisines'].fillna('')

# STEP 3: Split multiple cuisines into a list
df['Cuisine_List'] = df['Cuisines'].str.split(', ')

# STEP 4: Explode the list so each cuisine gets its own row
exploded_df = df.explode('Cuisine_List')

# STEP 5: Count how many times each cuisine appears
cuisine_counts = exploded_df['Cuisine_List'].value_counts()

# STEP 6: Get the top 3 most common cuisines
top_3_cuisines = cuisine_counts.head(3)

# STEP 7: Calculate percentage of restaurants that serve each top cuisine
# Note: We divide by total number of restaurants (not total cuisine entries)
total_restaurants = df.shape[0]
top_3_percentages = (top_3_cuisines / total_restaurants) * 100

# STEP 8: Combine results into a final DataFrame
result_df = pd.DataFrame({
    'Cuisine': top_3_cuisines.index,
    'Count': top_3_cuisines.values,
    'Percentage of Restaurants': top_3_percentages.round(2)
})

# STEP 9: Display the result
print(result_df)



                   Cuisine  Count  Percentage of Restaurants
Cuisine_List                                                
North Indian  North Indian   3960                      41.46
Chinese            Chinese   2735                      28.64
Fast Food        Fast Food   1986                      20.79
