In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
import random
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.colors import n_colors
from plotly.subplots import make_subplots
# init_notebook_mode(connected=True)
import cufflinks as cf
cf.go_offline()
from wordcloud import WordCloud , ImageColorGenerator
from PIL import Image

In [3]:
df = pd.read_csv('indian_food.csv')
df.head()

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region
0,Balu shahi,"Maida flour, yogurt, oil, sugar",vegetarian,45,25,sweet,dessert,West Bengal,East
1,Boondi,"Gram flour, ghee, sugar",vegetarian,80,30,sweet,dessert,Rajasthan,West
2,Gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins",vegetarian,15,60,sweet,dessert,Punjab,North
3,Ghevar,"Flour, ghee, kewra, milk, clarified butter, su...",vegetarian,15,30,sweet,dessert,Rajasthan,West
4,Gulab jamun,"Milk powder, plain flour, baking powder, ghee,...",vegetarian,15,40,sweet,dessert,West Bengal,East


In [4]:
df.columns

Index(['name', 'ingredients', 'diet', 'prep_time', 'cook_time',
       'flavor_profile', 'course', 'state', 'region'],
      dtype='object')

The dataset consists of about **255** Indian dishes and **9** columns associated with each of them.

The **9** columns are as follows:-

**name** : name of the dish

**ingredients** : main ingredients used

**diet** : type of diet - either vegetarian or non vegetarian

**prep_time** : preparation time

**cook_time** : cooking time

**flavor_profile** : flavor profile includes whether the dish is spicy, sweet, bitter, etc

**course** : course of meal - starter, main course, dessert, etc

**state** : state where the dish is famous or is originated

**region** : region where the state belongs

All the observations in this notebook will be based on these **255** dishes. There are many more dishes in Indian Cuisine! 

In [5]:
df.shape

(255, 9)

In [6]:
df.describe()

Unnamed: 0,prep_time,cook_time
count,255.0,255.0
mean,31.105882,34.529412
std,72.554409,48.26565
min,-1.0,-1.0
25%,10.0,20.0
50%,10.0,30.0
75%,20.0,40.0
max,500.0,720.0


### Proportion of Vegetarian and Non-Vegetarian dishes

In [7]:
pie_df = df.diet.value_counts().reset_index()
pie_df.columns = ['diet', 'count']
fig = px.pie(pie_df, values='count', names='diet', title='Proportion of Vegetarian and Non-Vegetarian Dishes', color_discrete_sequence=['green', 'red'])
fig.show()

**Observation** - Vegetarian dishes are more than Non-Vegetarian dishes - which does mirror Indian food choices in general as well.

### Number of dishes based on region

In [11]:
reg_df = df.region.value_counts().reset_index()
reg_df.columns = ['region', 'count']
reg_df = reg_df.sample(frac=1)
fig = px.bar(reg_df, x='region', y='count', title='Number of dishes based on regions')
fig.show()

### Number of dishes based on courses of meal

In [25]:
course_df = df.course.value_counts().reset_index()
course_df.columns = ['course', 'count']
course_df = course_df.sort_values(by='count',ascending=True)
course_df = course_df.sample(frac=1)
fig = px.bar(course_df, x='course', y='count', title='Number of dishes based on courses of meal')
fig.show()

**Observation** - It is quite evident that 'main course' leads all other courses in this dataset.

About 129 dishes are preferably eaten as main course in India.

### Proportion of Flavor Profiles

In [51]:
flav_df = df.flavor_profile.value_counts().reset_index()
flav_df.columns = ['flavor', 'count']
flav_df = flav_df.sample(frac=1)
fig = px.pie(flav_df, values='count', names='flavor', title='Proportion of Flavor Profiles', color_discrete_sequence=['#636EFA', '#EF553B', '#00CC96', '#AB63FA'])
fig.show()