#  Data Analysis on Zomato Restaurant Data


# Features of the Data

Item Name : name of the food item.

Item Description : Description of the food item. 

Price : Price of the food item.

Must try tag : Binary variable stating whether the food item is recommended or not.



In [2]:
import pandas as pd
import numpy as np

In [3]:
df  = pd.read_csv('products.csv')

In [4]:
df.head()

Unnamed: 0,Item_Name,Item_Description,Item_Price,Must_Try
0,Dal Makhani Combo - 2,24-Hour Dal Makhani (500g) + Soya Angaar Chaap...,₹700,False
1,Gosht Dum Biryani {600g},A master piece from the kingdom of Rampur this...,₹590,True
2,24-Hour Dal Makhani {550g},Classic black lentils slow cooked over night w...,₹390,True
3,Murgh Kachi Biryani {600g},Boneless chicken cooked in true Hydrabadi styl...,₹490,False
4,Butter Chicken {550g},Classic boneless chicken tikka tossed in butte...,₹450,True


In [5]:
df.isnull()

Unnamed: 0,Item_Name,Item_Description,Item_Price,Must_Try
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False
...,...,...,...,...
83,False,True,False,False
84,False,True,False,False
85,False,True,False,False
86,False,True,False,False


In [6]:
df.isnull().sum()

Item_Name           0
Item_Description    8
Item_Price          0
Must_Try            0
dtype: int64

In [7]:
df.dropna(inplace=True)

In [8]:
df.isnull().sum()

Item_Name           0
Item_Description    0
Item_Price          0
Must_Try            0
dtype: int64

In [9]:
df['Item_Description']

0     24-Hour Dal Makhani (500g) + Soya Angaar Chaap...
1     A master piece from the kingdom of Rampur this...
2     Classic black lentils slow cooked over night w...
3     Boneless chicken cooked in true Hydrabadi styl...
4     Classic boneless chicken tikka tossed in butte...
                            ...                        
77    Kashmiri Subz Biryani (1200g) + Burhani Raita ...
78    Butter Chicken (1000g) + Choice Of bread (8 pc...
79    Murgh Kachi Biryani [1.2 kg]+Burhani Raita [25...
80    A classic sweet lassi perfect to cool down the...
81    A perfect combination of mango and ginger with...
Name: Item_Description, Length: 80, dtype: object

In [10]:
from sklearn.feature_extraction.text import CountVectorizer

In [11]:
def get_top_words(x):
    vec = CountVectorizer().fit(x)
    bow = vec.transform(x)
    sum_words = bow.sum(axis=0)
    word_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    word_freq = sorted(word_freq, key=lambda x: x[1], reverse=True)
    return word_freq[:]

In [12]:
words = get_top_words(df['Item_Description'])

In [13]:
words

[('with', 41),
 ('of', 39),
 ('and', 39),
 ('in', 34),
 ('bread', 24),
 ('choice', 22),
 ('chicken', 19),
 ('classic', 17),
 ('makhani', 16),
 ('cooked', 16),
 ('pcs', 14),
 ('butter', 12),
 ('boneless', 12),
 ('flavoured', 12),
 ('flour', 11),
 ('paneer', 11),
 ('soya', 10),
 ('marinated', 10),
 ('chaap', 9),
 ('made', 9),
 ('biryani', 8),
 ('coconut', 8),
 ('phirni', 8),
 ('dessert', 8),
 ('angaar', 7),
 ('creamy', 7),
 ('raita', 7),
 ('24', 6),
 ('hour', 6),
 ('dal', 6),
 ('from', 6),
 ('the', 6),
 ('tossed', 6),
 ('spices', 6),
 ('cashew', 6),
 ('refined', 6),
 ('tandoor', 6),
 ('wrapped', 6),
 ('burhani', 6),
 ('100g', 6),
 ('slow', 5),
 ('tikka', 5),
 ('gravy', 5),
 ('dried', 5),
 ('roasted', 5),
 ('300g', 5),
 ('cheese', 5),
 ('chargrilled', 5),
 ('on', 5),
 ('green', 5),
 ('chocolate', 5),
 ('mousse', 5),
 ('srikhand', 5),
 ('whole', 5),
 ('wheat', 5),
 ('200g', 4),
 ('tomatoes', 4),
 ('buttery', 4),
 ('leg', 4),
 ('blend', 4),
 ('coated', 4),
 ('yoghurt', 4),
 ('mix', 4),
 ('l

In [14]:
df1 = pd.DataFrame(words, columns=['Word','Frequency'])

In [15]:
df1

Unnamed: 0,Word,Frequency
0,with,41
1,of,39
2,and,39
3,in,34
4,bread,24
...,...,...
219,cool,1
220,down,1
221,heat,1
222,combination,1


In [16]:
df1.to_excel("text_freq.xls")