## Imports + validating data

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv("zomato.csv", encoding = "ISO-8859-1")

In [3]:
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [4]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [5]:
data.shape

(9551, 21)

In [6]:
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

In [7]:
data.shape

(9542, 21)

### What do we know?
#### We know so far that:
* The data has no nan
* The data has no nulls
* The data has 21 columns and about 9.5k samples

# Data Exploration

## Determing best country codes

In [8]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [9]:
data.head(5)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [10]:
data['Country Code'].unique()

array([162,  30, 216,  14,  37, 184, 214,   1,  94, 148, 215, 166, 189,
       191, 208], dtype=int64)

In [11]:
data['Country Code'].value_counts(ascending=False)

1      8652
216     425
215      80
214      60
30       60
189      60
148      40
208      34
14       24
162      22
94       21
191      20
166      20
184      20
37        4
Name: Country Code, dtype: int64

## Beginning filtering of columns we need

In [12]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [13]:
# We are creating a list of columns we need
want = []

In [14]:
data['City']

0            Makati City
1            Makati City
2       Mandaluyong City
3       Mandaluyong City
4       Mandaluyong City
5       Mandaluyong City
6             Pasay City
7             Pasay City
8             Pasay City
9             Pasig City
10            Pasig City
11            Pasig City
12           Quezon City
13         San Juan City
14         San Juan City
15            Santa Rosa
16            Santa Rosa
17         Tagaytay City
18           Taguig City
19           Taguig City
20           Taguig City
21           Taguig City
22             Brasí_lia
23             Brasí_lia
24             Brasí_lia
25             Brasí_lia
26             Brasí_lia
27             Brasí_lia
28             Brasí_lia
29             Brasí_lia
              ...       
9512              Ankara
9513              Ankara
9514              Ankara
9515              Ankara
9516              Ankara
9517              Ankara
9518              Ankara
9519              Ankara
9520              Ankara


In [15]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [16]:
data['Locality'].unique()

array(['Century City Mall, Poblacion, Makati City',
       'Little Tokyo, Legaspi Village, Makati City',
       'Edsa Shangri-La, Ortigas, Mandaluyong City', ..., 'Ko\x81ôuyolu',
       'Kuruí_e\x81ôme', 'Moda'], dtype=object)

In [17]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [18]:
data['Locality Verbose'].unique()

array(['Century City Mall, Poblacion, Makati City, Makati City',
       'Little Tokyo, Legaspi Village, Makati City, Makati City',
       'Edsa Shangri-La, Ortigas, Mandaluyong City, Mandaluyong City',
       ..., 'Ko\x81ôuyolu, ÛÁstanbul', 'Kuruí_e\x81ôme, ÛÁstanbul',
       'Moda, ÛÁstanbul'], dtype=object)

In [19]:
data['Longitude'].unique().shape

(8111,)

In [20]:
data['Latitude'].unique().shape

(8668,)

In [21]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

## Found cuisines - this will be added later

In [22]:
data['Cuisines'].unique()

array(['French, Japanese, Desserts', 'Japanese',
       'Seafood, Asian, Filipino, Indian', ..., 'Burger, Izgara',
       'World Cuisine, Patisserie, Cafe', 'Italian, World Cuisine'],
      dtype=object)

In [23]:
data['Cuisines']

0                       French, Japanese, Desserts
1                                         Japanese
2                 Seafood, Asian, Filipino, Indian
3                                  Japanese, Sushi
4                                 Japanese, Korean
5                                          Chinese
6                                  Asian, European
7               Seafood, Filipino, Asian, European
8                          European, Asian, Indian
9                                         Filipino
10                               Filipino, Mexican
11                   American, Ice Cream, Desserts
12                               Filipino, Mexican
13                                        Filipino
14                                          Korean
15               Cafe, American, Italian, Filipino
16                                  Italian, Pizza
17                                        Filipino
18                          Cafe, Korean, Desserts
19                 Cafe, Bakery

In [24]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

## Filtering average cost for two and currency to get rupee values
## Now, we will convert the cost for two into us dollars for easy viewing

In [25]:
data['Average Cost for two']

0       1100
1       1200
2       4000
3       1500
4       1500
5       1000
6       2000
7       2000
8       6000
9       1100
10       800
11       900
12       800
13      1000
14       700
15       800
16       850
17      1200
18       600
19      1500
20      3000
21      1800
22        55
23        30
24        45
25        60
26        50
27        80
28        90
29       200
        ... 
9512      50
9513     100
9514      60
9515      80
9516     150
9517     400
9518      60
9519      70
9520      40
9521      50
9522      80
9523     100
9524      50
9525      70
9526      70
9527      40
9528      50
9529      30
9530      80
9531     130
9532      75
9533      80
9534      35
9535      40
9536      90
9537      80
9538     105
9539     170
9540     120
9541      55
Name: Average Cost for two, Length: 9542, dtype: int64

In [26]:
data['Currency']

0         Botswana Pula(P)
1         Botswana Pula(P)
2         Botswana Pula(P)
3         Botswana Pula(P)
4         Botswana Pula(P)
5         Botswana Pula(P)
6         Botswana Pula(P)
7         Botswana Pula(P)
8         Botswana Pula(P)
9         Botswana Pula(P)
10        Botswana Pula(P)
11        Botswana Pula(P)
12        Botswana Pula(P)
13        Botswana Pula(P)
14        Botswana Pula(P)
15        Botswana Pula(P)
16        Botswana Pula(P)
17        Botswana Pula(P)
18        Botswana Pula(P)
19        Botswana Pula(P)
20        Botswana Pula(P)
21        Botswana Pula(P)
22      Brazilian Real(R$)
23      Brazilian Real(R$)
24      Brazilian Real(R$)
25      Brazilian Real(R$)
26      Brazilian Real(R$)
27      Brazilian Real(R$)
28      Brazilian Real(R$)
29      Brazilian Real(R$)
               ...        
9512      Turkish Lira(TL)
9513      Turkish Lira(TL)
9514      Turkish Lira(TL)
9515      Turkish Lira(TL)
9516      Turkish Lira(TL)
9517      Turkish Lira(TL)
9

In [27]:
# Checking for any non rupee values
hasNonRupee = False
for elem in data['Currency']:
    if not (elem == 'Indian Rupees(Rs.)'):
        hasNonRupee = True

In [28]:
hasNonRupee

True

In [29]:
data = data[data['Currency'] == 'Indian Rupees(Rs.)']

In [30]:
# Checking for any non rupee values
hasNonRupee = False
for elem in data['Currency']:
    if not (elem == 'Indian Rupees(Rs.)'):
        hasNonRupee = True

In [31]:
hasNonRupee

False

In [32]:
want.append('Cuisines')

In [33]:
want

['Cuisines']

In [34]:
dolToRup = 69.8
data['Average Cost for two'] = data['Average Cost for two']/dolToRup

In [35]:
want.append('Average Cost for two')

In [36]:
want

['Cuisines', 'Average Cost for two']

In [37]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

## Viewing other columns and choosing

In [38]:
data['Has Online delivery'].value_counts()

No     6229
Yes    2423
Name: Has Online delivery, dtype: int64

In [39]:
data['Is delivering now'].value_counts()

No     8618
Yes      34
Name: Is delivering now, dtype: int64

In [40]:
data['Has Table booking'].value_counts()

No     7541
Yes    1111
Name: Has Table booking, dtype: int64

In [41]:
data['Switch to order menu'].value_counts()

No    8652
Name: Switch to order menu, dtype: int64

In [42]:
data['Price range'].value_counts()

1    4295
2    2858
3    1111
4     388
Name: Price range, dtype: int64

In [43]:
data['Price range']

615     3
616     2
617     2
618     2
619     3
620     4
621     4
622     4
623     3
624     4
625     1
626     2
627     3
628     1
629     4
630     1
631     3
632     2
633     3
634     2
635     3
636     4
637     1
638     3
639     3
640     2
641     3
642     3
643     3
644     2
       ..
9237    2
9238    3
9239    2
9240    3
9241    3
9242    4
9243    4
9244    3
9245    1
9246    3
9247    2
9248    2
9249    1
9250    3
9251    4
9252    1
9253    4
9254    3
9255    2
9256    2
9257    2
9258    2
9259    2
9260    2
9261    2
9262    2
9263    2
9264    2
9265    4
9266    3
Name: Price range, Length: 8652, dtype: int64

In [44]:
want.append('Has Table booking')
want.append('Has Online delivery')
want.append('Price range')

In [45]:
want

['Cuisines',
 'Average Cost for two',
 'Has Table booking',
 'Has Online delivery',
 'Price range']

In [46]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

## Getting possible Y values for use in other program

In [47]:
# Now, we need to get all the ys we need - rating values
data['Aggregate rating']

615     3.9
616     3.5
617     3.6
618     4.0
619     4.2
620     4.0
621     4.3
622     4.0
623     3.6
624     3.8
625     3.9
626     4.1
627     3.4
628     4.9
629     3.8
630     3.9
631     3.7
632     4.4
633     4.2
634     4.1
635     4.2
636     3.7
637     4.5
638     4.1
639     4.3
640     4.4
641     4.4
642     3.8
643     3.6
644     4.6
       ... 
9237    3.4
9238    3.5
9239    3.4
9240    3.6
9241    3.3
9242    3.3
9243    3.5
9244    3.2
9245    3.5
9246    3.5
9247    4.6
9248    4.3
9249    4.2
9250    4.4
9251    4.1
9252    3.7
9253    4.9
9254    3.8
9255    3.8
9256    3.6
9257    3.5
9258    3.6
9259    3.7
9260    4.1
9261    4.0
9262    3.6
9263    3.7
9264    4.3
9265    3.8
9266    4.4
Name: Aggregate rating, Length: 8652, dtype: float64

In [48]:
data['Rating color']

615         Yellow
616         Yellow
617         Yellow
618          Green
619          Green
620          Green
621          Green
622          Green
623         Yellow
624         Yellow
625         Yellow
626          Green
627         Orange
628     Dark Green
629         Yellow
630         Yellow
631         Yellow
632          Green
633          Green
634          Green
635          Green
636         Yellow
637     Dark Green
638          Green
639          Green
640          Green
641          Green
642         Yellow
643         Yellow
644     Dark Green
           ...    
9237        Orange
9238        Yellow
9239        Orange
9240        Yellow
9241        Orange
9242        Orange
9243        Yellow
9244        Orange
9245        Yellow
9246        Yellow
9247    Dark Green
9248         Green
9249         Green
9250         Green
9251         Green
9252        Yellow
9253    Dark Green
9254        Yellow
9255        Yellow
9256        Yellow
9257        Yellow
9258        

In [49]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [50]:
want.append('Aggregate rating')

In [51]:
data['Rating text']

615          Good
616          Good
617          Good
618     Very Good
619     Very Good
620     Very Good
621     Very Good
622     Very Good
623          Good
624          Good
625          Good
626     Very Good
627       Average
628     Excellent
629          Good
630          Good
631          Good
632     Very Good
633     Very Good
634     Very Good
635     Very Good
636          Good
637     Excellent
638     Very Good
639     Very Good
640     Very Good
641     Very Good
642          Good
643          Good
644     Excellent
          ...    
9237      Average
9238         Good
9239      Average
9240         Good
9241      Average
9242      Average
9243         Good
9244      Average
9245         Good
9246         Good
9247    Excellent
9248    Very Good
9249    Very Good
9250    Very Good
9251    Very Good
9252         Good
9253    Excellent
9254         Good
9255         Good
9256         Good
9257         Good
9258         Good
9259         Good
9260    Very Good
9261    Ve

In [52]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [53]:
data['Votes']

615      140
616       71
617       94
618       87
619      177
620       45
621      133
622       41
623       59
624       46
625      103
626      121
627       70
628       77
629       57
630       98
631      175
632      134
633      166
634      168
635     1582
636     1315
637      217
638      769
639      731
640      113
641      944
642       63
643      375
644      166
        ... 
9237      85
9238     172
9239      96
9240      63
9241      34
9242      67
9243      83
9244      26
9245     109
9246      57
9247     289
9248     230
9249     270
9250      73
9251     125
9252     240
9253     345
9254      27
9255     175
9256     154
9257     124
9258      84
9259      57
9260      75
9261     169
9262     193
9263      85
9264     172
9265      74
9266     316
Name: Votes, Length: 8652, dtype: int64

## Processing cuisines
### Changing to number of cuisines available

In [54]:
print(data['Cuisines'].nunique())

1392


In [55]:
print(data['Cuisines'].value_counts())

North Indian                                                         936
North Indian, Chinese                                                511
Fast Food                                                            348
Chinese                                                              340
North Indian, Mughlai                                                334
Cafe                                                                 279
Bakery                                                               216
North Indian, Mughlai, Chinese                                       197
Bakery, Desserts                                                     170
Street Food                                                          149
Pizza, Fast Food                                                     130
Chinese, Fast Food                                                   118
Mithai, Street Food                                                  116
South Indian                                       

In [56]:
data.reset_index(drop=True, inplace=True)

for i in range(len(data['Cuisines'])):
    curr = data['Cuisines'][i]
    curr = curr.split()
    curr = len(curr)
    data['Cuisines'][i] = curr  

In [57]:
print(data['Cuisines'].value_counts())

3     2397
2     2338
1     1395
4     1302
5      649
6      355
7      121
8       39
11      18
9       18
10      12
12       8
Name: Cuisines, dtype: int64


# Taking care of rating 0 values

In [58]:
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,3400025,Jahanpanah,1,Agra,"E 23, Shopping Arcade, Sadar Bazaar, Agra Cant...",Agra Cantt,"Agra Cantt, Agra",78.011544,27.161661,3,...,Indian Rupees(Rs.),No,No,No,No,3,3.9,Yellow,Good,140
1,3400341,Rangrezz Restaurant,1,Agra,"E-20, Shopping Arcade, Sadar Bazaar, Agra Cant...",Agra Cantt,"Agra Cantt, Agra",0.0,0.0,3,...,Indian Rupees(Rs.),No,No,No,No,2,3.5,Yellow,Good,71
2,3400005,Time2Eat - Mama Chicken,1,Agra,"Main Market, Sadar Bazaar, Agra Cantt, Agra",Agra Cantt,"Agra Cantt, Agra",78.011608,27.160832,2,...,Indian Rupees(Rs.),No,No,No,No,2,3.6,Yellow,Good,94
3,3400021,Chokho Jeeman Marwari Jain Bhojanalya,1,Agra,"1/48, Delhi Gate, Station Road, Raja Mandi, Ci...",Civil Lines,"Civil Lines, Agra",77.998092,27.195928,1,...,Indian Rupees(Rs.),No,No,No,No,2,4.0,Green,Very Good,87
4,3400017,Pinch Of Spice,1,Agra,"23/453, Opposite Sanjay Cinema, Wazipura Road,...",Civil Lines,"Civil Lines, Agra",78.007553,27.201725,4,...,Indian Rupees(Rs.),No,No,No,No,3,4.2,Green,Very Good,177


In [59]:
data['Aggregate rating'].value_counts()

0.0    2139
3.1     511
3.2     510
3.4     477
3.3     472
3.0     465
3.5     454
3.6     411
2.9     380
3.7     363
3.8     336
2.8     314
3.9     283
2.7     250
4.0     193
2.6     190
4.1     185
4.2     140
2.5     109
4.3     100
2.4      83
4.4      74
2.3      46
4.5      37
4.6      35
2.2      26
4.9      19
4.7      16
2.1      15
4.8       9
2.0       7
1.9       2
1.8       1
Name: Aggregate rating, dtype: int64

In [60]:
data.reset_index(drop=True, inplace=True)
toDrop = []
for i in range(len(data['Aggregate rating'])):
    curr = data['Aggregate rating'][i]
    if (curr == 0.0):
        toDrop.append(i)
        
data.drop(toDrop, inplace=True)
data.reset_index(drop=True, inplace=True)

In [61]:
cleanedData = data[want]

In [62]:
cleanedData.head()

Unnamed: 0,Cuisines,Average Cost for two,Has Table booking,Has Online delivery,Price range,Aggregate rating
0,3,12.17765,No,No,3,3.9
1,3,10.028653,No,No,2,3.5
2,2,7.163324,No,No,2,3.6
3,1,5.730659,No,No,2,4.0
4,4,14.326648,No,No,3,4.2


In [63]:
type(cleanedData)

pandas.core.frame.DataFrame

In [64]:
cleanedData.to_csv('CleanedData.csv', index=False)