<a href="https://colab.research.google.com/github/MattJonDS/IndiaRestaurants/blob/main/Zomato.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Uploading Libraries and bringing in Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = '/content/drive/MyDrive/Geo-project/zomato.csv'

In [3]:
df = pd.read_csv(data)

In [4]:
df = pd.DataFrame(df)

In [5]:
type(df)

In [6]:
df.head(3)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari


In [7]:
print(df.columns)
print(df.shape)

Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')
(51717, 17)


###  Removing duplicate and missing values in rows in data

In [8]:
df.duplicated().sum()

0

In [9]:
df.isnull().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [10]:
df['location'].unique()

array(['Banashankari', 'Basavanagudi', 'Mysore Road', 'Jayanagar',
       'Kumaraswamy Layout', 'Rajarajeshwari Nagar', 'Vijay Nagar',
       'Uttarahalli', 'JP Nagar', 'South Bangalore', 'City Market',
       'Nagarbhavi', 'Bannerghatta Road', 'BTM', 'Kanakapura Road',
       'Bommanahalli', nan, 'CV Raman Nagar', 'Electronic City', 'HSR',
       'Marathahalli', 'Sarjapur Road', 'Wilson Garden', 'Shanti Nagar',
       'Koramangala 5th Block', 'Koramangala 8th Block', 'Richmond Road',
       'Koramangala 7th Block', 'Jalahalli', 'Koramangala 4th Block',
       'Bellandur', 'Whitefield', 'East Bangalore', 'Old Airport Road',
       'Indiranagar', 'Koramangala 1st Block', 'Frazer Town', 'RT Nagar',
       'MG Road', 'Brigade Road', 'Lavelle Road', 'Church Street',
       'Ulsoor', 'Residency Road', 'Shivajinagar', 'Infantry Road',
       'St. Marks Road', 'Cunningham Road', 'Race Course Road',
       'Commercial Street', 'Vasanth Nagar', 'HBR Layout', 'Domlur',
       'Ejipura', 'Jeevan 

In [11]:
df.dropna(subset=['location'], inplace = True)

In [12]:
df.isnull().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7754
votes                              0
phone                           1187
location                           0
rest_type                        206
dish_liked                     28057
cuisines                          24
approx_cost(for two people)      325
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

###  As location is the most important column lets make it more readable

In [13]:
df_1 = df.copy()

In [14]:
df_1['location'] = df_1['location'] + ' , Bangalore , Karnataka , India'

In [15]:
df_1['location']

0             Banashankari , Bangalore , Karnataka , India
1             Banashankari , Bangalore , Karnataka , India
2             Banashankari , Bangalore , Karnataka , India
3             Banashankari , Bangalore , Karnataka , India
4             Basavanagudi , Bangalore , Karnataka , India
                               ...                        
51712           Whitefield , Bangalore , Karnataka , India
51713           Whitefield , Bangalore , Karnataka , India
51714           Whitefield , Bangalore , Karnataka , India
51715    ITPL Main Road, Whitefield , Bangalore , Karna...
51716    ITPL Main Road, Whitefield , Bangalore , Karna...
Name: location, Length: 51696, dtype: object

###  Checking column data types

In [16]:
df_1.dtypes

url                            object
address                        object
name                           object
online_order                   object
book_table                     object
rate                           object
votes                           int64
phone                          object
location                       object
rest_type                      object
dish_liked                     object
cuisines                       object
approx_cost(for two people)    object
reviews_list                   object
menu_item                      object
listed_in(type)                object
listed_in(city)                object
dtype: object

###  Subsetting data to extract Lat and Long

In [17]:
rest_loc = pd.DataFrame()

In [18]:
df_1['location'].unique()

array(['Banashankari , Bangalore , Karnataka , India',
       'Basavanagudi , Bangalore , Karnataka , India',
       'Mysore Road , Bangalore , Karnataka , India',
       'Jayanagar , Bangalore , Karnataka , India',
       'Kumaraswamy Layout , Bangalore , Karnataka , India',
       'Rajarajeshwari Nagar , Bangalore , Karnataka , India',
       'Vijay Nagar , Bangalore , Karnataka , India',
       'Uttarahalli , Bangalore , Karnataka , India',
       'JP Nagar , Bangalore , Karnataka , India',
       'South Bangalore , Bangalore , Karnataka , India',
       'City Market , Bangalore , Karnataka , India',
       'Nagarbhavi , Bangalore , Karnataka , India',
       'Bannerghatta Road , Bangalore , Karnataka , India',
       'BTM , Bangalore , Karnataka , India',
       'Kanakapura Road , Bangalore , Karnataka , India',
       'Bommanahalli , Bangalore , Karnataka , India',
       'CV Raman Nagar , Bangalore , Karnataka , India',
       'Electronic City , Bangalore , Karnataka , India',
  

In [19]:
rest_loc['Name'] = df_1['location'].unique()

### Get the Long and Lat from the geopy package of each place

In [20]:
!pip install geopy



In [21]:
from geopy.geocoders import Nominatim

In [22]:
geolocator = Nominatim(user_agent="app", timeout=None)

In [23]:
lat = []
lon = []

for name in rest_loc['Name']:
  location = geolocator.geocode(name)

  if location is None:
    lat.append(np.nan)
    lon.append(np.nan)

  else:
    lat.append(location.latitude)
    lon.append(location.longitude)





In [24]:
print(lat)
print(lon)

[12.9393328, 12.9417261, 12.9604945, 12.9418488, 12.9067683, 12.9274413, 12.9660722, 12.9039566, 12.9096941, 12.864107149999999, 12.965717999999999, 12.9812956, 12.9366292, 12.911275849999999, 12.9368751, 12.9089453, 12.9855509, 12.848759900000001, 12.9116225, 12.9552572, 12.9237639, 12.9489339, 12.9575547, 12.9348429, 12.9408685, 12.9700474, 12.9364846, 13.0394104, 12.9327778, 12.93103185, 12.9696365, 13.001147, 12.9604498, 12.9732913, 12.9277245, 12.9986827, 13.0227204, 12.9755264, 12.9672765, 12.9683054, 12.9749032, 12.9778793, 12.9735743, 12.986391, 12.9821293, 12.9744255, 12.9910375, 12.9843978, 12.9822324, 12.9934283, 13.0358698, 12.9624669, 12.945245, 12.9678074, 12.9824848, 13.0027353, 12.9931876, 13.0093455, 12.9390255, 12.9783547, 12.957998, 12.97339325, 12.9578658, 12.96381425, 12.9874878, 12.9621761, 13.007516, 12.9243692, 12.9282918, 12.9340114, 12.9585353, 12.9882338, 13.0141618, 12.9817233, 13.0422794, 13.0258087, 13.0221416, 13.0268145, 13.0784743, nan, 12.973936, 12.98

In [25]:
rest_loc['lat'] = lat
rest_loc['lon'] = lon

In [26]:
rest_loc.head(5)

Unnamed: 0,Name,lat,lon
0,"Banashankari , Bangalore , Karnataka , India",12.939333,77.553982
1,"Basavanagudi , Bangalore , Karnataka , India",12.941726,77.575502
2,"Mysore Road , Bangalore , Karnataka , India",12.960494,77.556961
3,"Jayanagar , Bangalore , Karnataka , India",12.941849,77.586898
4,"Kumaraswamy Layout , Bangalore , Karnataka , I...",12.906768,77.559502


In [27]:
rest_loc.isnull().sum()

Name    0
lat     2
lon     2
dtype: int64

In [28]:
rest_loc['lat'].isnull()

0     False
1     False
2     False
3     False
4     False
      ...  
88    False
89    False
90    False
91    False
92    False
Name: lat, Length: 93, dtype: bool

In [29]:
rest_loc[rest_loc['lat'].isnull()]

Unnamed: 0,Name,lat,lon
79,"Rammurthy Nagar , Bangalore , Karnataka , India",,
85,"Sadashiv Nagar , Bangalore , Karnataka , India",,


In [30]:
rest_loc['lat'][79] = 13.0156
rest_loc['lon'][79] = 77.6773


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_loc['lat'][79] = 13.0156
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_loc['lon'][79] = 77.6773


In [31]:
rest_loc['lat'][85] = 13.010316
rest_loc['lon'][85] = 77.580569

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_loc['lat'][85] = 13.010316
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_loc['lon'][85] = 77.580569


In [32]:
rest_loc.head(10)

Unnamed: 0,Name,lat,lon
0,"Banashankari , Bangalore , Karnataka , India",12.939333,77.553982
1,"Basavanagudi , Bangalore , Karnataka , India",12.941726,77.575502
2,"Mysore Road , Bangalore , Karnataka , India",12.960494,77.556961
3,"Jayanagar , Bangalore , Karnataka , India",12.941849,77.586898
4,"Kumaraswamy Layout , Bangalore , Karnataka , I...",12.906768,77.559502
5,"Rajarajeshwari Nagar , Bangalore , Karnataka ,...",12.927441,77.515522
6,"Vijay Nagar , Bangalore , Karnataka , India",12.966072,77.612594
7,"Uttarahalli , Bangalore , Karnataka , India",12.903957,77.509624
8,"JP Nagar , Bangalore , Karnataka , India",12.909694,77.586607
9,"South Bangalore , Bangalore , Karnataka , India",12.864107,77.554416


In [33]:
import warnings
from warnings import filterwarnings
filterwarnings('ignore')

In [34]:
rest_loc.isnull().sum()

Name    0
lat     0
lon     0
dtype: int64

### How to write Structured Queries to extract Latitudes & Longitedes.

In [35]:
df.head(2)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari


In [36]:
geolocator = Nominatim(user_agent="app", timeout=None)

In [37]:
df['address'][0]

'942, 21st Main Road, 2nd Stage, Banashankari, Bangalore'

In [38]:
geolocator.geocode(df['address'][0])

Location(21st Main Road, Seven Hills Layout, Padmanabha Nagar, Bangalore South, Bengaluru Urban, Karnataka, 560061, India, (12.9186965, 77.5541702, 0.0))

In [39]:
address = {'street':'21st Main Road', 'city':'Bangalore', 'country':'India'}

In [40]:
address_geocode = geolocator.geocode(address)

In [41]:
hasattr(address_geocode, 'latitude')

True

In [42]:
df_1['location'].value_counts()

location
BTM , Bangalore , Karnataka , India                      5124
HSR , Bangalore , Karnataka , India                      2523
Koramangala 5th Block , Bangalore , Karnataka , India    2504
JP Nagar , Bangalore , Karnataka , India                 2235
Whitefield , Bangalore , Karnataka , India               2144
                                                         ... 
West Bangalore , Bangalore , Karnataka , India              6
Yelahanka , Bangalore , Karnataka , India                   6
Jakkur , Bangalore , Karnataka , India                      3
Rajarajeshwari Nagar , Bangalore , Karnataka , India        2
Peenya , Bangalore , Karnataka , India                      1
Name: count, Length: 93, dtype: int64

In [43]:
Rest_locations = df_1['location'].value_counts().reset_index()

In [44]:
Rest_locations.columns = ['Name', 'count']

In [45]:
Rest_locations

Unnamed: 0,Name,count
0,"BTM , Bangalore , Karnataka , India",5124
1,"HSR , Bangalore , Karnataka , India",2523
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504
3,"JP Nagar , Bangalore , Karnataka , India",2235
4,"Whitefield , Bangalore , Karnataka , India",2144
...,...,...
88,"West Bangalore , Bangalore , Karnataka , India",6
89,"Yelahanka , Bangalore , Karnataka , India",6
90,"Jakkur , Bangalore , Karnataka , India",3
91,"Rajarajeshwari Nagar , Bangalore , Karnataka ,...",2


In [46]:
rest_loc

Unnamed: 0,Name,lat,lon
0,"Banashankari , Bangalore , Karnataka , India",12.939333,77.553982
1,"Basavanagudi , Bangalore , Karnataka , India",12.941726,77.575502
2,"Mysore Road , Bangalore , Karnataka , India",12.960494,77.556961
3,"Jayanagar , Bangalore , Karnataka , India",12.941849,77.586898
4,"Kumaraswamy Layout , Bangalore , Karnataka , I...",12.906768,77.559502
...,...,...,...
88,"West Bangalore , Bangalore , Karnataka , India",13.009476,77.553089
89,"Magadi Road , Bangalore , Karnataka , India",12.975608,77.555356
90,"Yelahanka , Bangalore , Karnataka , India",13.100698,77.596345
91,"Sahakara Nagar , Bangalore , Karnataka , India",13.062147,77.580061


### Merging the 2 datasets together.

In [47]:
Beng_rest_locations = Rest_locations.merge(rest_loc, on='Name')

In [48]:
type(Beng_rest_locations)

In [49]:
Beng_rest_locations.head(5)

Unnamed: 0,Name,count,lat,lon
0,"BTM , Bangalore , Karnataka , India",5124,12.911276,77.604565
1,"HSR , Bangalore , Karnataka , India",2523,12.911623,77.638862
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504,12.934843,77.618977
3,"JP Nagar , Bangalore , Karnataka , India",2235,12.909694,77.586607
4,"Whitefield , Bangalore , Karnataka , India",2144,12.969637,77.749745


### mapping a plot with folium

In [50]:
!pip install folium



In [51]:
import folium

In [52]:
folium.Map()

In [53]:
def Generate_basemap():
  basemap = folium.Map(location=[12.97, 77.59])
  return basemap

In [54]:
basemap = Generate_basemap()

In [55]:
from folium.plugins import HeatMap

In [56]:
HeatMap(Beng_rest_locations[['lat', 'lon', 'count']]).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x79fb170e7af0>

In [57]:
basemap

In [58]:
# performing a master cluster map
from folium.plugins import FastMarkerCluster

In [59]:
basemap = Generate_basemap()

In [60]:
basemap

In [63]:
FastMarkerCluster(Beng_rest_locations[['lat', 'lon', 'count']]).add_to(basemap)

<folium.plugins.fast_marker_cluster.FastMarkerCluster at 0x79fb111bb9d0>

In [64]:
basemap

In [65]:
Beng_rest_locations

Unnamed: 0,Name,count,lat,lon
0,"BTM , Bangalore , Karnataka , India",5124,12.911276,77.604565
1,"HSR , Bangalore , Karnataka , India",2523,12.911623,77.638862
2,"Koramangala 5th Block , Bangalore , Karnataka ...",2504,12.934843,77.618977
3,"JP Nagar , Bangalore , Karnataka , India",2235,12.909694,77.586607
4,"Whitefield , Bangalore , Karnataka , India",2144,12.969637,77.749745
...,...,...,...,...
88,"West Bangalore , Bangalore , Karnataka , India",6,13.009476,77.553089
89,"Yelahanka , Bangalore , Karnataka , India",6,13.100698,77.596345
90,"Jakkur , Bangalore , Karnataka , India",3,13.078474,77.606894
91,"Rajarajeshwari Nagar , Bangalore , Karnataka ,...",2,12.927441,77.515522


In [66]:
basemap = Generate_basemap()

In [70]:
for idx, row in Beng_rest_locations.iterrows():
  folium.Marker(location=[row['lat'], row['lon']], popup=row['count']).add_to(basemap)

In [71]:
basemap