# Import packages

In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Read in data (Extract)

In [29]:
data = pd.read_csv(r'C:\Users\dlaminis\Desktop\practice\Geospatial Projects\Zomato\zomato.csv')

In [30]:
type(data) # dataframe (collection/key-value pair; 2D) or series (array/list; 1D)
data.head(5) # first 5 records
data.columns # list of columns of data
data.shape # rows x columns

(51717, 17)

## Data Transformation (Transform)

### 1. Check for duplicates

In [32]:
data.duplicated() # shows if particular instance/record is duplicated 
data.duplicated().sum() # get total number of duplicated rows

0

### 2. Check for missing values

In [36]:
data.isnull() # shows if cells are null (True/False)
data.isnull().sum() # shows number of cells null by features

# if there is a col w >50% of rows missing or there are few missing values, drop col
data.dropna(subset=['location'], inplace = True)  # "inplace" = update current df
data.isnull().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7754
votes                              0
phone                           1187
location                           0
rest_type                        206
dish_liked                     28057
cuisines                          24
approx_cost(for two people)      325
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [45]:
df = data.copy() # for manipulation

In [46]:
df.head(5)

# location is of interest, currently only shows city name; needs to be more readable and have more info
df['location'] = df['location'] + ", Bangalore, Karnataka, India"

In [47]:
df['location']

0                Banashankari, Bangalore, Karnataka, India
1                Banashankari, Bangalore, Karnataka, India
2                Banashankari, Bangalore, Karnataka, India
3                Banashankari, Bangalore, Karnataka, India
4                Basavanagudi, Bangalore, Karnataka, India
                               ...                        
51712              Whitefield, Bangalore, Karnataka, India
51713              Whitefield, Bangalore, Karnataka, India
51714              Whitefield, Bangalore, Karnataka, India
51715    ITPL Main Road, Whitefield, Bangalore, Karnata...
51716    ITPL Main Road, Whitefield, Bangalore, Karnata...
Name: location, Length: 51696, dtype: object

### 3. Check various data types

In [48]:
df.dtypes

url                            object
address                        object
name                           object
online_order                   object
book_table                     object
rate                           object
votes                           int64
phone                          object
location                       object
rest_type                      object
dish_liked                     object
cuisines                       object
approx_cost(for two people)    object
reviews_list                   object
menu_item                      object
listed_in(type)                object
listed_in(city)                object
dtype: object

# Extract Latitude and Longitude from data

In [49]:
df.head(3)
# the data does not have lat and long, only address and name

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,"Banashankari, Bangalore, Karnataka, India",Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,"Banashankari, Bangalore, Karnataka, India",Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,"Banashankari, Bangalore, Karnataka, India","Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari


In [52]:
# Create a new df where we have all unique locations so that we can extract lat and ling
rest_loc = pd.DataFrame() # blank df

In [53]:
# store all unique location names in the new df
rest_loc['Name'] = df['location'].unique()

In [55]:
rest_loc

Unnamed: 0,Name
0,"Banashankari, Bangalore, Karnataka, India"
1,"Basavanagudi, Bangalore, Karnataka, India"
2,"Mysore Road, Bangalore, Karnataka, India"
3,"Jayanagar, Bangalore, Karnataka, India"
4,"Kumaraswamy Layout, Bangalore, Karnataka, India"
...,...
88,"West Bangalore, Bangalore, Karnataka, India"
89,"Magadi Road, Bangalore, Karnataka, India"
90,"Yelahanka, Bangalore, Karnataka, India"
91,"Sahakara Nagar, Bangalore, Karnataka, India"


In [56]:
# using nomanitam tool under geopy to extract coords from openstreet map (geocoding)
!pip install geopy

Collecting geopy
  Obtaining dependency information for geopy from https://files.pythonhosted.org/packages/e5/15/cf2a69ade4b194aa524ac75112d5caac37414b20a3a03e6865dfe0bd1539/geopy-2.4.1-py3-none-any.whl.metadata
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
     ---------------------------------------- 0.0/40.3 kB ? eta -:--:--
     ---------------------------------------- 0.0/40.3 kB ? eta -:--:--
     ---------------------------------------- 0.0/40.3 kB ? eta -:--:--
     ---------- ----------------------------- 10.2/40.3 kB ? eta -:--:--
     ---------- ----------------------------- 10.2/40.3 kB ? eta -:--:--
     ------------------- ------------------ 20.5/40.3 kB 131.3 kB/s eta 0:00:01
     ------------------- ------------------ 20.5/40.3 kB 131.3 kB/s eta 0:00:01
     ---------------------------- --------- 30.7/40.3 kB 131.3 kB/s eta 0:00:01
     ----------------

In [57]:
from geopy.geocoders import Nominatim

In [58]:
geolocator = Nominatim(user_agent= "app", timeout=None)

In [62]:
lat = []
long = []

for name in rest_loc['Name']:
    location = geolocator.geocode(name)
    
    if location is None: # if the Nominatim is unable to geocode
        lat.append(np.nan)
        long.append(np.nan)
        
    else:
        lat.append(location.latitude)
        long.append(location.longitude)

In [63]:
print(lat)

[12.9152208, 12.9417261, 12.9466619, 12.9292731, 12.9081487, 12.9274413, 12.9660722, 12.9055682, 12.9096941, 12.93060265, 12.965717999999999, 12.9841958, 12.8551932, 12.911275849999999, 12.8901688, 12.9089453, 12.985098650000001, 12.848759900000001, 12.9116225, 12.9552572, 12.9244812, 12.9489339, 12.9575547, 12.9348429, 12.9408685, 12.9662372, 12.9364846, 13.0464531, 12.9327778, 12.93103185, 12.9696365, 13.001147, 12.9572041, 12.9732913, 12.9277245, 12.9986827, 13.0227204, 12.9755264, 12.9750849, 12.9749487, 12.9756281, 12.9778793, 12.974103, 12.986391, 12.9821293, 12.9744255, 12.9844498, 12.9843978, 12.9822324, 12.9934283, 13.0358698, 12.9624669, 12.945245, 12.9678074, 13.0262267, 13.0027353, 12.9931876, 13.0093455, 12.9390255, 12.978129800000001, 12.957998, 12.97339325, 12.9578658, 12.957434549999999, 12.987693, 12.944569, 13.007516, 12.9243692, 12.9282918, 12.9340114, 12.9344425, 12.9882338, 13.0141618, 13.022234699999998, 13.0431413, 13.0258087, 13.0221416, 13.0268145, 13.0784743, 

In [64]:
rest_loc['lat'] = lat
rest_loc['long'] = long

In [65]:
rest_loc

Unnamed: 0,Name,lat,long
0,"Banashankari, Bangalore, Karnataka, India",12.915221,77.573598
1,"Basavanagudi, Bangalore, Karnataka, India",12.941726,77.575502
2,"Mysore Road, Bangalore, Karnataka, India",12.946662,77.530090
3,"Jayanagar, Bangalore, Karnataka, India",12.929273,77.582423
4,"Kumaraswamy Layout, Bangalore, Karnataka, India",12.908149,77.555318
...,...,...,...
88,"West Bangalore, Bangalore, Karnataka, India",13.009476,77.553089
89,"Magadi Road, Bangalore, Karnataka, India",12.975608,77.555356
90,"Yelahanka, Bangalore, Karnataka, India",13.100698,77.596345
91,"Sahakara Nagar, Bangalore, Karnataka, India",13.062147,77.580061


In [66]:
# check for missing values
rest_loc.isnull().sum()

Name    0
lat     2
long    2
dtype: int64

In [68]:
rest_loc[rest_loc['lat'].isnull()]
# can use google search to fill the coords of these locations

Unnamed: 0,Name,lat,long
79,"Rammurthy Nagar, Bangalore, Karnataka, India",,
85,"Sadashiv Nagar, Bangalore, Karnataka, India",,


In [72]:
# ignoring  warnings
import warnings
from warnings import filterwarnings
filterwarnings('ignore')

In [73]:
rest_loc['lat'][79] = 13.0120218
rest_loc['long'][79] = 77.6777817

rest_loc['lat'][85] = 13.0080052
rest_loc['long'][85] = 77.5796762


In [74]:
rest_loc.isnull().sum()

Name    0
lat     0
long    0
dtype: int64

## How to write Structured Queries to extract Lat and Long

In [76]:
df.head(2)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,"Banashankari, Bangalore, Karnataka, India",Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,"Banashankari, Bangalore, Karnataka, India",Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari


In [78]:
geolocator = Nominatim(user_agent= "app", timeout=None)

In [77]:
df['address'][0]

'942, 21st Main Road, 2nd Stage, Banashankari, Bangalore'

In [81]:
loc = geolocator.geocode(df['address'][0]) # geocoding using the physical address

In [82]:
hasattr(loc, 'latitude') # checking if the above obj has the "latitude" attribute

False

In [83]:
# as the above was false, need to create structured query i.e. tell nominatim the contents of the address
# using a dictionary that will be passed to the nominatim, it should be able to geocode

addr = {'street': '1st Main Road', 'city': 'Bangalore', 'country': 'India', 'state': 'Karnataka'}
addr_geocode = geolocator.geocode(addr)

In [85]:
hasattr(addr_geocode, 'latitude')

True

In [86]:
addr_geocode.latitude

12.9935097

# Exploratory Data Analysis (EDA)

## Where are most number of restaurants located in Bangalore city?

In [96]:
# compute count for each location
# lat and long used to make heatmap (used to identify where something occurs/showing density of locations)

df['location'].value_counts() # shows frequency of each location

BTM, Bangalore, Karnataka, India                      5124
HSR, Bangalore, Karnataka, India                      2523
Koramangala 5th Block, Bangalore, Karnataka, India    2504
JP Nagar, Bangalore, Karnataka, India                 2235
Whitefield, Bangalore, Karnataka, India               2144
                                                      ... 
West Bangalore, Bangalore, Karnataka, India              6
Yelahanka, Bangalore, Karnataka, India                   6
Jakkur, Bangalore, Karnataka, India                      3
Rajarajeshwari Nagar, Bangalore, Karnataka, India        2
Peenya, Bangalore, Karnataka, India                      1
Name: location, Length: 93, dtype: int64

In [99]:
type(df['location'].value_counts()) 
# this shows us that this is a series (list, 1D), but we need it to be a df

rest_locations = df['location'].value_counts().reset_index() # converts to df

In [100]:
rest_locations.columns = ["Name", "count"]

In [103]:
rest_locations

Unnamed: 0,Name,count
0,"BTM, Bangalore, Karnataka, India",5124
1,"HSR, Bangalore, Karnataka, India",2523
2,"Koramangala 5th Block, Bangalore, Karnataka, I...",2504
3,"JP Nagar, Bangalore, Karnataka, India",2235
4,"Whitefield, Bangalore, Karnataka, India",2144
...,...,...
88,"West Bangalore, Bangalore, Karnataka, India",6
89,"Yelahanka, Bangalore, Karnataka, India",6
90,"Jakkur, Bangalore, Karnataka, India",3
91,"Rajarajeshwari Nagar, Bangalore, Karnataka, India",2


In [105]:
# but we already have a rest_loc df w lat and long; therefore going to merge using common attr (name)
# left df (rest_locations) merge w right df (rest_loc) on attr 'Name'
bang_rest_locations = rest_locations.merge(rest_loc, on = 'Name') 

In [107]:
bang_rest_locations

Unnamed: 0,Name,count,lat,long
0,"BTM, Bangalore, Karnataka, India",5124,12.911276,77.604565
1,"HSR, Bangalore, Karnataka, India",2523,12.911623,77.638862
2,"Koramangala 5th Block, Bangalore, Karnataka, I...",2504,12.934843,77.618977
3,"JP Nagar, Bangalore, Karnataka, India",2235,12.909694,77.586607
4,"Whitefield, Bangalore, Karnataka, India",2144,12.969637,77.749745
...,...,...,...,...
88,"West Bangalore, Bangalore, Karnataka, India",6,13.009476,77.553089
89,"Yelahanka, Bangalore, Karnataka, India",6,13.100698,77.596345
90,"Jakkur, Bangalore, Karnataka, India",3,13.078474,77.606894
91,"Rajarajeshwari Nagar, Bangalore, Karnataka, India",2,12.927441,77.515522


### Create Heatmap

In [None]:
# simple 2 step

In [110]:
!pip install folium

Collecting folium
  Obtaining dependency information for folium from https://files.pythonhosted.org/packages/18/09/8569904c8ce5679cc02826d98de633c07abcd2443a23181e5f71ff9dacbc/folium-0.15.1-py2.py3-none-any.whl.metadata
  Downloading folium-0.15.1-py2.py3-none-any.whl.metadata (3.4 kB)
Collecting branca>=0.6.0 (from folium)
  Obtaining dependency information for branca>=0.6.0 from https://files.pythonhosted.org/packages/17/ce/14166d0e273d12065516625fb02426350298e7b4ba59198b5fe454b46202/branca-0.7.1-py3-none-any.whl.metadata
  Downloading branca-0.7.1-py3-none-any.whl.metadata (1.5 kB)
Downloading folium-0.15.1-py2.py3-none-any.whl (97 kB)
   ---------------------------------------- 0.0/97.0 kB ? eta -:--:--
   ------------ --------------------------- 30.7/97.0 kB 640.0 kB/s eta 0:00:01
   ---------------- ----------------------- 41.0/97.0 kB 487.6 kB/s eta 0:00:01
   ------------------------- -------------- 61.4/97.0 kB 465.5 kB/s eta 0:00:01
   ------------------------------------- --

In [117]:
import folium

In [118]:
# create function that will generate basemap 
def generate_basemap():
    basemap = folium.Map(location = [12.97 , 77.59])
    return basemap

In [121]:
basemap = generate_basemap()

In [122]:
from folium.plugins import HeatMap

In [126]:
bang_rest_locations.columns
bang_rest_locations[['lat', 'long', 'count']]

Unnamed: 0,lat,long,count
0,12.911276,77.604565,5124
1,12.911623,77.638862,2523
2,12.934843,77.618977,2504
3,12.909694,77.586607,2235
4,12.969637,77.749745,2144
...,...,...,...
88,13.009476,77.553089,6
89,13.100698,77.596345,6
90,13.078474,77.606894,3
91,12.927441,77.515522,2


In [127]:
HeatMap(bang_rest_locations[['lat', 'long', 'count']]).add_to(basemap) # takes in [lat, long, weight/count] to be displayed
# needs to be added on to basemap as it just displays as a HeatMap object

<folium.plugins.heat_map.HeatMap at 0x14f695fe310>

In [128]:
basemap