# Indian Cuisine EDA
The dataset used is [Indian Food 101](https://www.kaggle.com/datasets/nehaprabhavalkar/indian-food-101?datasetId=865197&sortBy=voteCount),from Kaggle uploaded by Neha Prabhavalkar.

In [None]:
# IMPORTING LIBRARIES
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly
import seaborn as sns
import plotly.express as px
import plotly.offline as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from matplotlib.pyplot import figure, show
from wordcloud import WordCloud,STOPWORDS
from collections import Counter

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [4]:
#LOADING THE DATASET
df = pd.read_csv("../input/indian-food-101/indian_food.csv")

In [5]:
df.head(5)

In [6]:
df.shape

In [7]:
df.info()

In [9]:
df.describe(include= 'object')

In [11]:
for x in df.columns:#CONVERISON OF NAN VALUES
    df[x] = df[x].apply(lambda x: np.nan if(x=='-1' or x==-1)else x)

In [18]:
for x in df.columns:
    print('Null Value in {0} = {1} \npercent = {2}%\n'.format(x,df[x].isna().sum(),df[x].isna().sum()/len(df)))

In [21]:
for x in df.columns:
    print('Unique Values in  {0} = {1}'.format(x,df[x].nunique()))

In [22]:
df['diet'].value_counts()

In [25]:
fig = px.histogram(df,x='diet',color='diet',title='Unique Values in Diet Column')

fig.show()

In [26]:
df['flavor_profile'].value_counts()

In [28]:
fig = px.histogram(df.dropna(),x='flavor_profile',color= 'flavor_profile',title = "Unique Values in flavor_profile Column")

fig.show()

In [30]:
df['course'].value_counts()

In [29]:
fig = px.histogram(df,x='course',color='course',title = 'Unique Value in Course Column')

fig.show()

In [50]:
fig=px.bar(df, x='region'     
      )
#change color of the bins
fig.update_traces(marker_color='dodgerblue',width=0.6)
fig.update_layout(title_text='<i><b>Dishes per Region</b></i>', title_x=0.5,colorway=[],title_font_size=32, font_family="Arial",
    font_color="#800080", xaxis_title="Regions of India",
    yaxis_title="Number of different Dishes", font_size=15)



In [59]:
fig2=px.bar(
    df, x='region', color= 'diet',
       title= 'Different Diet per Region', 
       labels= {'region': 'Region', 'diet': 'Diet'},
       color_discrete_sequence=['#B5EAAA','#32612D']
      )
fig2.update_layout(title_text='<i><b>Average Cooking Time per Dish per Region and Diet</b></i>', title_x=0.5,title_font_size=24, font_family='Arial',
    font_color="#800080", xaxis_title="Indian Region",
    yaxis_title="Number of different Dishes", font_size=15)

In [79]:
fig4=px.pie(df, names='course',color_discrete_sequence=["lightskyblue","lightpink","brown","orange"])

fig4.update_traces(textfont_size=20, pull=[0, 0, 0, 0.2],
                  marker=dict(line=dict(color='#000000', width=2)))
fig4.update_layout(title_text='<i><b>Dessert, Main Course, Snack or Starter?</b></i>', title_x=0.5, title_font_color="#3B2F2F",title_font_size=36, font_family="Arial", font_size=14)

In [95]:
def region_plots(name, title, color) : 
    region_df = pd.pivot_table(df, index = ['region',name],aggfunc = np.sum)
    region_df.reset_index(inplace = True)
    if color == 'RdBu' :
        fig = px.bar(region_df, x ='region' , y='cook_time' , color= name,barmode = 'group', title = title,
                 color_discrete_sequence = px.colors.sequential.RdBu)
    elif color == 'Rainbow' : 
        fig = px.bar(region_df, x ='region' , y='cook_time' , color= name,barmode = 'group', title = title,
                 color_discrete_sequence = px.colors.sequential.Rainbow)
    else : 
        fig = px.bar(region_df, x ='region' , y='cook_time' , color= name,barmode = 'group', title = title)
    fig.update_layout(title_text= title,template='plotly',title_x=0.5)
    return fig.show()

In [106]:
region_plots('course','Region Wise - Courses Count','Rainbow')

In [108]:
region_plots('diet','Region Wise-Veg & Non Veg Dishes Count','RdBu')

In [121]:
plot_graph('State','state', 'Proportion of States', 'Rainbow', 'pie')

In [115]:
# STATE WISE INFO:
def state_infograph(statename, title) : 
    new_df = df[df['state']== statename]

    total_dishes = new_df.shape[0]

    course_df = new_df['course'].value_counts().reset_index()
    course_df.columns = ['course','num']

    diet_df = new_df['diet'].value_counts().reset_index()
    diet_df.columns = ['diet','num']
    
#     flavor_df = new_df['prep_time'].value_counts().reset_index()
#     flavor_df.columns = ['Flavor','num']

    prep_time_df = new_df['prep_time'].value_counts().reset_index()
    prep_time_df.columns = ['prep_time', 'num']

    fig = make_subplots(
        rows=2, cols=3,subplot_titles=('Total Dishes','Dishes by Courses','Dishes by Preparation time', '',''),
        specs=[[{'type': 'indicator'},{'type': 'bar','rowspan': 2},{'type': 'bar','rowspan': 2}],
              [ {'type': 'pie'} , {'type': 'pie'}, {'type': 'pie'}]])

    fig.add_trace(go.Indicator(
        mode = 'number',
        value = int(total_dishes),
        number={'font':{'color': '#270082','size':50}},
    ),row=1, col=1)


    fig.add_trace(go.Bar(x=course_df['course'],y=course_df['num'], marker={'color': 'blue'}, 
                         text=course_df['num'],name='dishes by courses',textposition ='auto'),row=1, col=2)

    fig.add_trace(go.Pie(labels=diet_df['diet'], values=diet_df['num'],textinfo='percent',
                         marker= dict(colors=['#00bd0d','#fc0303'])),row=2, col=1)

    fig.add_trace(go.Bar(
        x=prep_time_df['prep_time'],y=course_df['num'],marker={'color': '#fc0335'}, text=course_df['num'],
        name='flavors by courses',textposition ='auto'),row=1, col=3)

    fig.update_layout(title_text= title,template='plotly',title_x=0.5)

    return fig.show()

In [120]:
state_infograph('Maharashtra', 'Maharashtra Food Info')

In [124]:
state_infograph('Odisha', 'Odisha Food Info')

In [126]:
state_infograph('Uttar Pradesh', 'Uttar Pradesh Food Info')

In [128]:
state_infograph('Punjab', 'Punjab Food Info')

In [131]:
col_one_list = df['ingredients'][0].split(",")

ingredientsList=[]
for i in range(0,255):
    ingredientsList.append(df['ingredients'][i].split(","))
    
flat_list = []
for sublist in ingredientsList:
    for item in sublist:
        flat_list.append(item)
        
countIngredients=[]
for i in flat_list:
   countIngredients.append(flat_list.count(i))

flat_small=map(lambda x:x.lower(), flat_list)
Ingredients_df = pd.DataFrame(list(zip(flat_small, countIngredients)), 
               columns =['Ingredient', 'Occurrence'])
sorted=Ingredients_df.sort_values(by=['Occurrence'], ascending=False)

unique_ingredients=sorted.drop_duplicates()
#unique_string=(" ").join(flat_small)
wordcloud = WordCloud(width = 900, height = 400).generate(" ".join(flat_list))
plt.figure(figsize=(15,8))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
    

In [149]:
#unique_ingredients.loc[unique_ingredients['Occurrence'] < 10, 'Ingredient'] = 'Other Ingredients'
fig=px.bar(unique_ingredients.head(20), x='Ingredient' , y='Occurrence'  
      )
#change color of the bins
fig.update_traces(marker_color='purple',width=0.6)
fig.update_layout(title_text='<i><b>Top 20 Most Common Ingredients</b></i>', title_x=0.5,title_font_size=36, font_family="Arial",
    font_color="#0B6623", xaxis_title="Ingredients",
    yaxis_title="Frequency of Ingredients", font_size=14)

In [139]:
sns.heatmap(df.corr())
plt.title('Corelation Heatmap')
plt.yticks(rotation = 0)
size=(10,6)