## 7. Geo-Spatial Features

In [1]:
import pandas as pd
from tqdm import tqdm
tqdm.pandas()
import pandas_profiling as pf 
import os
import numpy as np
import seaborn as sns
import matplotlib as plt
pd.options.mode.chained_assignment = None
%matplotlib inline

  from pandas import Panel


In [2]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='my_app',timeout=None)

from geopy.extra.rate_limiter import RateLimiter

### Extraction for a geo-coordinates

In [3]:
location = geolocator.reverse("-33.902, 18.675")
location.raw['address']

{'suburb': 'Cape Town Ward 3',
 'town': 'Bellville',
 'county': 'City of Cape Town',
 'state': 'Western Cape',
 'country': 'South Africa',
 'country_code': 'za'}

### GeoData Creation

In [7]:
GeoData = pd.read_csv('Data/MergedData.csv')
GeoData = GeoData[['segment_id','EventId','latitude', 'longitude']]
display(GeoData.head(2))
print(GeoData.shape)

Unnamed: 0,segment_id,EventId,latitude,longitude
0,S0B3CGQ,60558,-33.888275,18.540896
1,RYJYAPI,60559,-34.140857,18.930756


(53845, 4)


### GeoCoordinates Rounding

In [8]:
# Rounding off to 2 decimal places
GeoData['latitudeX'] = GeoData.latitude.round(3)
GeoData['longitudeX'] = GeoData.longitude.round(3)
display(GeoData.head(2))
print('GeoData size = ',len(GeoData))

Unnamed: 0,segment_id,EventId,latitude,longitude,latitudeX,longitudeX
0,S0B3CGQ,60558,-33.888275,18.540896,-33.888,18.541
1,RYJYAPI,60559,-34.140857,18.930756,-34.141,18.931


GeoData size =  53845


In [9]:
GeoData.to_csv('Data/Geo_Rounding.csv', index=None)

### Dropping duplicates in latitudeX & latitudeX

In [10]:
#Combining latitudeX & latitudeX
GeoData['LatLong'] = GeoData.latitudeX.astype('str')+'+'+GeoData.longitudeX.astype('str')
GeoData[:1]

Unnamed: 0,segment_id,EventId,latitude,longitude,latitudeX,longitudeX,LatLong
0,S0B3CGQ,60558,-33.888275,18.540896,-33.888,18.541,-33.888+18.541


In [11]:
# GeoData slim
GeoDataX = GeoData.drop_duplicates(['LatLong'], keep='last')
# Reset index
GeoDataX.reset_index(drop=True, inplace=True)
display(GeoDataX.head(2))
print('GeoDataX size = ',len(GeoDataX))

Unnamed: 0,segment_id,EventId,latitude,longitude,latitudeX,longitudeX,LatLong
0,X4UA382,60564,-33.885498,18.638471,-33.885,18.638,-33.885+18.638
1,SPPGKO2,60578,-33.855022,18.531405,-33.855,18.531,-33.855+18.531


GeoDataX size =  2215


In [8]:
#Dropping LatLong
del GeoDataX['LatLong']

### Saving the data

In [9]:
GeoDataX.to_csv('Data/geodata/geolocation.csv', index=None)

### Extracting the geo info for the data frmae

In [10]:
def get_suburb(row):
    pos = str(row['latitudeX']) + ', ' + str(row['longitudeX'])
    location = geolocator.reverse(pos)
    return location.raw['address']

In [11]:
GeoDataX['address'] = GeoDataX[['latitudeX','longitudeX']].progress_apply(get_suburb,axis=1)
#Saving the results
GeoDataX.to_csv('Data/GeoDataX_New.csv',index=None)
GeoDataX[:2]

100%|██████████████████████████████████████████████████████████████████████████████| 2215/2215 [29:00<00:00,  1.27it/s]


Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,address
0,60564,-33.885498,18.638471,-33.885,18.638,"{'road': 'Springfield Road', 'suburb': 'Cape T..."
1,60578,-33.855022,18.531405,-33.855,18.531,"{'road': 'N7', 'suburb': 'Milnerton', 'city': ..."


In [36]:
GeoDataX =pd.read_csv('Data/GeoDataX_New.csv')
GeoDataX[:2]

Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,address
0,60564,-33.885498,18.638471,-33.885,18.638,"{'road': 'Springfield Road', 'suburb': 'Cape T..."
1,60578,-33.855022,18.531405,-33.855,18.531,"{'road': 'N7', 'suburb': 'Milnerton', 'city': ..."


In [9]:
GeoDataMerged =pd.read_csv('Data/GeoDataRetrieved_PythonVersion.csv')
GeoDataMerged.reset_index(drop=True, inplace=True)
print(GeoDataMerged.shape)
display(GeoDataMerged.head(2))

(2215, 8)


Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,SN,suburb,address
0,60564,-33.885498,18.638471,-33.885,18.638,1,"{'suburb': 'Cape Town Ward 3', 'town': 'Bellvi...","{'road': 'Springfield Road', 'suburb': 'Cape T..."
1,60578,-33.855022,18.531405,-33.855,18.531,2,"{'suburb': 'Cape Town Ward 3', 'town': 'Bellvi...","{'road': 'N7', 'suburb': 'Milnerton', 'city': ..."


In [21]:
#Dropping LatLong
del GeoDataMerged['suburb']

### Extracting Suburb, Town, County, 

In [54]:
GeoDataMerged['address'][0]

"{'road': 'Springfield Road', 'suburb': 'Cape Town Ward 21', 'town': 'Bellville', 'county': 'City of Cape Town', 'state': 'Western Cape', 'postcode': '7530', 'country': 'South Africa', 'country_code': 'za'}"

In [23]:
# Checking the data type
type({'suburb': 'Cape Town Ward 3',
 'town': 'Bellville',
 'county': 'City of Cape Town',
 'state': 'Western Cape',
 'country': 'South Africa',
 'country_code': 'za'})

dict

### Splitting  the dictionary column to pandas coulumn

In [11]:
import ast 

In [12]:
GeoDataMergedx=GeoDataMerged['address'].apply(ast.literal_eval).apply(pd.Series)
GeoDataMergedx.reset_index(drop=True, inplace=True)
display(GeoDataMergedx.head(2))
print('GeoDataMergedx = ', GeoDataMergedx.shape)

Unnamed: 0,road,suburb,town,county,state,postcode,country,country_code,city,neighbourhood,...,restaurant,pitch,hotel,doityourself,parking,bus_stop,footway,station,toilets,police
0,Springfield Road,Cape Town Ward 21,Bellville,City of Cape Town,Western Cape,7530,South Africa,za,,,...,,,,,,,,,,
1,N7,Milnerton,,City of Cape Town,Western Cape,7441,South Africa,za,Cape Town,,...,,,,,,,,,,


GeoDataMergedx =  (2215, 49)


### Selecting the columns of interest

In [14]:
GeoDataMergedx = GeoDataMergedx[['road','suburb','town','state','city']]
GeoDataMergedx.head(2)

Unnamed: 0,road,suburb,town,state,city
0,Springfield Road,Cape Town Ward 21,Bellville,Western Cape,
1,N7,Milnerton,,Western Cape,Cape Town


### Checking the Completeness

In [15]:
GeoDataMergedx.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2215 entries, 0 to 2214
Data columns (total 5 columns):
road      2134 non-null object
suburb    2215 non-null object
town      825 non-null object
state     2215 non-null object
city      1195 non-null object
dtypes: object(5)
memory usage: 86.6+ KB


### Merging the GeoDataMergedx with GeoDataMerged Dataset with 

In [17]:
GeoDataMergedY = GeoDataX.join(GeoDataMergedx, how='outer')
print(GeoDataMergedY.shape)
GeoDataMergedY[:4]

(2215, 10)


Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,road,suburb,town,state,city
0,60564,-33.885498,18.638471,-33.885,18.638,Springfield Road,Cape Town Ward 21,Bellville,Western Cape,
1,60578,-33.855022,18.531405,-33.855,18.531,N7,Milnerton,,Western Cape,Cape Town
2,60684,-33.7965,18.882241,-33.796,18.882,N1,Stellenbosch Ward 18,,Western Cape,Stellenbosch Local Municipality
3,60701,-34.068404,18.810695,-34.068,18.811,Old Main Road,Cape Town Ward 15,,Western Cape,


In [18]:
GeoDataMergedY.to_csv('Data/GeoDataMergedY.csv', index = None)

### Extracting the Suburb Bounding Box and Geo-Coordinates

### Dropping the duplicates in Suburb column

In [19]:
GeoDataMergedY = GeoDataMergedY.drop_duplicates(['suburb'], keep='last')
GeoDataMergedY.reset_index(drop=True, inplace=True)
display(GeoDataMergedY.head(2))
print('GeoDataMergedY size = ',len(GeoDataMergedY))

Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,road,suburb,town,state,city
0,63216,-33.952298,18.465721,-33.952,18.466,Rhodes Drive,Rosebank,,Western Cape,Cape Town
1,138883,-33.88828,18.56437,-33.888,18.564,Giel Basson Drive,Kaapzicht,Parow,Western Cape,


GeoDataMergedY size =  102


In [20]:
GeoDataMergedY.to_csv('Data/GeoDataMergedY.csv', index = None)

### Adding the suffix "Cape Town" 
This is because rosebamk, exist in the other part of SA and world

In [21]:
GeoDataMergedY['suburb_long'] = GeoDataMergedY.suburb + '' + ' Cape Town'
GeoDataMergedY['suburb_long'].head(2)

0     Rosebank Cape Town
1    Kaapzicht Cape Town
Name: suburb_long, dtype: object

In [22]:
GeoDataMergedY.head(2)

Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,road,suburb,town,state,city,suburb_long
0,63216,-33.952298,18.465721,-33.952,18.466,Rhodes Drive,Rosebank,,Western Cape,Cape Town,Rosebank Cape Town
1,138883,-33.88828,18.56437,-33.888,18.564,Giel Basson Drive,Kaapzicht,Parow,Western Cape,,Kaapzicht Cape Town


### Geo-coding the suburb to get the bouding box and geo-coordinates

In [55]:
location = geolocator.geocode("Plattekloof, Cape Town")
location.raw

{'place_id': 198287243,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'relation',
 'osm_id': 2051480,
 'boundingbox': ['-33.8886906', '-33.8615971', '18.5715808', '18.6003196'],
 'lat': '-33.8775',
 'lon': '18.5855556',
 'display_name': 'Plattekloof, Parow, City of Cape Town, Western Cape, South Africa',
 'class': 'place',
 'type': 'suburb',
 'importance': 0.56,
 'icon': 'https://nominatim.openstreetmap.org/images/mapicons/poi_place_village.p.20.png'}

In [None]:
GeoDataMergedY.shape

In [51]:
boundary = [[-33, 20], [-35, 18]]
def get_geocodinate_bounding_box(row):
    pos = str(row['suburb'])
    locations =  RateLimiter(geolocator.geocode(pos, country_codes = 'za', viewbox=boundary, bounded=True), min_delay_seconds=1)          
    return locations

In [56]:
## query the server(geopy) and return the location as a function
GeoDataMergedY['func'] = GeoDataMergedY[['suburb']].progress_apply(get_geocodinate_bounding_box,axis=1)
GeoDataMergedY[:2]





  0%|                                                                                          | 0/102 [00:00<?, ?it/s][A[A[A[A



  2%|█▌                                                                                | 2/102 [00:00<00:38,  2.60it/s][A[A[A[A



  3%|██▍                                                                               | 3/102 [00:01<00:49,  1.99it/s][A[A[A[A



  4%|███▏                                                                              | 4/102 [00:02<00:56,  1.73it/s][A[A[A[A



  5%|████                                                                              | 5/102 [00:03<01:01,  1.57it/s][A[A[A[A



  6%|████▊                                                                             | 6/102 [00:03<01:06,  1.45it/s][A[A[A[A



  7%|█████▋                                                                            | 7/102 [00:04<01:07,  1.40it/s][A[A[A[A



  8%|██████▍                                       

Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,address,road,suburb,town,state,city,suburb_long,func
0,63216,-33.952298,18.465721,-33.952,18.466,"{'address29': ""Mostert's Mill"", 'road': 'Rhode...",Rhodes Drive,Rosebank,,Western Cape,Cape Town,Rosebank Cape Town,<geopy.extra.rate_limiter.RateLimiter object a...
1,138883,-33.88828,18.56437,-33.888,18.564,"{'road': 'Giel Basson Drive', 'suburb': 'Kaapz...",Giel Basson Drive,Kaapzicht,Parow,Western Cape,,Kaapzicht Cape Town,<geopy.extra.rate_limiter.RateLimiter object a...


In [None]:
GeoDataMergedY['address_dict'] = GeoDataMergedY['func'].apply(lambda loc: loc.func.raw if loc else None)
#GeoDataMergedY = pd.concat([GeoDataMergedY,GeoDataY_unique_Suburb['address_dict'].apply(pd.Series)] ,axis=1)

In [59]:
GeoDataMergedY['address_dict'] = GeoDataMergedY['func'].apply(lambda loc: loc.func.raw if loc else None)

Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,address,road,suburb,town,state,city,suburb_long,func,address_dict
0,63216,-33.952298,18.465721,-33.952,18.466,"{'address29': ""Mostert's Mill"", 'road': 'Rhode...",Rhodes Drive,Rosebank,,Western Cape,Cape Town,Rosebank Cape Town,<geopy.extra.rate_limiter.RateLimiter object a...,"{'place_id': 261097441, 'licence': 'Data © Ope..."
1,138883,-33.88828,18.56437,-33.888,18.564,"{'road': 'Giel Basson Drive', 'suburb': 'Kaapz...",Giel Basson Drive,Kaapzicht,Parow,Western Cape,,Kaapzicht Cape Town,<geopy.extra.rate_limiter.RateLimiter object a...,"{'place_id': 198464063, 'licence': 'Data © Ope..."
2,140729,-33.906656,18.503528,-33.907,18.504,"{'road': 'Acre Road', 'suburb': 'Kensington', ...",Acre Road,Kensington,,Western Cape,Cape Town,Kensington Cape Town,<geopy.extra.rate_limiter.RateLimiter object a...,"{'place_id': 198565038, 'licence': 'Data © Ope..."


In [61]:
GeoDataMergedY = pd.concat([GeoDataMergedY,GeoDataMergedY['address_dict'].apply(pd.Series)] ,axis=1)
GeoDataMergedY.head(3)

Unnamed: 0,EventId,latitude,longitude,latitudeX,longitudeX,address,road,suburb,town,state,...,osm_type,osm_id,boundingbox,lat,lon,display_name,class,type,importance,icon
0,63216,-33.952298,18.465721,-33.952,18.466,"{'address29': ""Mostert's Mill"", 'road': 'Rhode...",Rhodes Drive,Rosebank,,Western Cape,...,relation,2034104,"[-33.9609345, -33.9499991, 18.4645707, 18.4834...",-33.9519444,18.4738889,"Rosebank, Cape Town, City of Cape Town, Wester...",place,suburb,0.36,https://nominatim.openstreetmap.org/images/map...
1,138883,-33.88828,18.56437,-33.888,18.564,"{'road': 'Giel Basson Drive', 'suburb': 'Kaapz...",Giel Basson Drive,Kaapzicht,Parow,Western Cape,...,relation,2051464,"[-33.8887422, -33.8792025, 18.5622216, 18.567914]",-33.8829439,18.5657386,"Kaapzicht, Parow, City of Cape Town, Western C...",place,suburb,0.36,https://nominatim.openstreetmap.org/images/map...
2,140729,-33.906656,18.503528,-33.907,18.504,"{'road': 'Acre Road', 'suburb': 'Kensington', ...",Acre Road,Kensington,,Western Cape,...,relation,2034993,"[-33.9189134, -33.9032607, 18.4958044, 18.5152...",-33.9144444,18.5069444,"Kensington, Cape Town, City of Cape Town, West...",place,suburb,0.36,https://nominatim.openstreetmap.org/images/map...


In [62]:
## Check if the returned adddress contains only SA's location
GeoDataMergedY.display_name.str.split(",").str[-1].value_counts()

 South Africa    102
Name: display_name, dtype: int64

### Get Geocordinate and bounding box of suburb 

In [64]:
GeoDataMergedY = GeoDataMergedY[['suburb','boundingbox','lat','lon']]
GeoDataMergedY.head(3)

Unnamed: 0,suburb,boundingbox,lat,lon
0,Rosebank,"[-33.9609345, -33.9499991, 18.4645707, 18.4834...",-33.9519444,18.4738889
1,Kaapzicht,"[-33.8887422, -33.8792025, 18.5622216, 18.567914]",-33.8829439,18.5657386
2,Kensington,"[-33.9189134, -33.9032607, 18.4958044, 18.5152...",-33.9144444,18.5069444


In [65]:
GeoDataMergedY.rename(columns={"lat": "latitude_suburb",
                                "lon": "longitude_suburb"
                                       },inplace=True)

In [66]:
GeoDataMergedY.head(3)

Unnamed: 0,suburb,boundingbox,latitude_suburb,longitude_suburb
0,Rosebank,"[-33.9609345, -33.9499991, 18.4645707, 18.4834...",-33.9519444,18.4738889
1,Kaapzicht,"[-33.8887422, -33.8792025, 18.5622216, 18.567914]",-33.8829439,18.5657386
2,Kensington,"[-33.9189134, -33.9032607, 18.4958044, 18.5152...",-33.9144444,18.5069444


In [69]:
GeoDataMergedY.to_csv("Data/Suburb_coordinate.csv", index=None)