# Geospatial Visualization

## Geojson Data
  
Import Libraries

In [19]:
# Import Libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import folium as flm
import calendar
#  For showing all columns in Pandas
pd.set_option('display.max_columns', None)

# this ignores the depreciation warnings etc
import warnings
warnings.filterwarnings("ignore")

### Create a Dataframe contianing geometry of the Police Station Areas
  
Read in the data and create a DataFrame.

In [20]:
# Read the geoJSON file using geopandas
geo_stat = gpd.read_file(r'../../../data/geodata/South_African_police_boundaries.geojson')
geo_stat = geo_stat[["COMPNT_NM", "geometry"]] # only select 'COMPNT_NM' (Police Stations) and 'geometry' columns
geo_prov = gpd.read_file(r'../../../data/geodata/sa_provinces.geojson')
geo_prov = geo_prov[["ADM1_ID", "ADM1_EN", "geometry"]] # only select 'COMPNT_NM' (Police Stations) and 'geometry' columns
geo_dist = gpd.read_file(r'../../../data/geodata/sa_districts.geojson')
geo_dist = geo_dist[["ADM2_ID", "ADM2_EN", "geometry"]] # only select 'COMPNT_NM' (Police Stations) and 'geometry' columns

In [21]:
geo_stat

Unnamed: 0,COMPNT_NM,geometry
0,BOTSHABELO,"POLYGON ((26.77137 -29.21403, 26.77330 -29.221..."
1,KHUBUSIDRIFT,"POLYGON ((27.72830 -32.53050, 27.72842 -32.531..."
2,STUTTERHEIM,"POLYGON ((27.50201 -32.44217, 27.49884 -32.465..."
3,MOTHERWELL,"POLYGON ((25.61061 -33.81772, 25.60713 -33.822..."
4,KWADWESI,"POLYGON ((25.45586 -33.83107, 25.46660 -33.833..."
...,...,...
1147,PABALELLO,"POLYGON ((21.19508 -28.41724, 21.22427 -28.438..."
1148,PAARL,"POLYGON ((18.89305 -33.57090, 18.89717 -33.573..."
1149,DARLING,"POLYGON ((18.15596 -33.32066, 18.15653 -33.320..."
1150,EENDEKUIL,"POLYGON ((19.01670 -32.75663, 18.98556 -32.770..."


In [22]:
geo_prov

Unnamed: 0,ADM1_ID,ADM1_EN,geometry
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
1,FS,Free State,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
2,GT,Gauteng,"POLYGON ((28.24428 -26.88478, 28.24696 -26.884..."
3,KZN,KwaZulu-Natal,"POLYGON ((30.19386 -31.08126, 30.19382 -31.081..."
4,LIM,Limpopo,"POLYGON ((31.88383 -23.98459, 31.88092 -23.967..."
5,MP,Mpumalanga,"POLYGON ((31.88383 -23.98459, 31.85268 -23.986..."
6,NW,North West,"POLYGON ((28.29816 -25.31037, 28.29835 -25.294..."
7,NC,Nothern Cape,"POLYGON ((22.63217 -26.12128, 22.63196 -26.121..."
8,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -..."


Fix spelling mistake - 'Nothern Cape' to 'Northern Cape'.

In [23]:
geo_prov['ADM1_EN'] = geo_prov['ADM1_EN'].replace('Nothern Cape','Northern Cape')
geo_prov

Unnamed: 0,ADM1_ID,ADM1_EN,geometry
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
1,FS,Free State,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
2,GT,Gauteng,"POLYGON ((28.24428 -26.88478, 28.24696 -26.884..."
3,KZN,KwaZulu-Natal,"POLYGON ((30.19386 -31.08126, 30.19382 -31.081..."
4,LIM,Limpopo,"POLYGON ((31.88383 -23.98459, 31.88092 -23.967..."
5,MP,Mpumalanga,"POLYGON ((31.88383 -23.98459, 31.85268 -23.986..."
6,NW,North West,"POLYGON ((28.29816 -25.31037, 28.29835 -25.294..."
7,NC,Northern Cape,"POLYGON ((22.63217 -26.12128, 22.63196 -26.121..."
8,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -..."


In [24]:
geo_dist

Unnamed: 0,ADM2_ID,ADM2_EN,geometry
0,DC44,Alfred Nzo,"POLYGON ((29.57220 -30.65515, 29.57219 -30.655..."
1,DC25,Amajuba,"POLYGON ((30.44827 -27.32774, 30.44836 -27.327..."
2,DC12,Amathole,"POLYGON ((28.35197 -31.82865, 28.35177 -31.828..."
3,DC37,Bojanala,"POLYGON ((28.29816 -25.31037, 28.29835 -25.294..."
4,BUF,Buffalo City,"POLYGON ((28.07553 -32.90779, 28.07532 -32.907..."
5,DC10,Cacadu,"POLYGON ((24.50627 -31.70683, 24.50615 -31.706..."
6,DC2,Cape Winelands,"POLYGON ((20.43860 -32.94006, 20.43684 -32.938..."
7,DC35,Capricorn,"POLYGON ((28.92328 -22.45704, 28.92253 -22.457..."
8,DC5,Central Karoo,"POLYGON ((24.15246 -31.78861, 24.14286 -31.789..."
9,DC13,Chris Hani,"POLYGON ((28.38127 -31.48000, 28.38104 -31.479..."


In [25]:
geo_dist['ADM2_EN'].unique()

array(['Alfred Nzo', 'Amajuba', 'Amathole', 'Bojanala', 'Buffalo City',
       'Cacadu', 'Cape Winelands', 'Capricorn', 'Central Karoo',
       'Chris Hani', 'City of Cape Town', 'City of Johannesburg',
       'City of Tshwane', 'Dr Kenneth Kaunda',
       'Dr Ruth Segomotsi Mompati', 'Eden', 'Ehlanzeni', 'Ekurhuleni',
       'eThekwini', 'Fezile Dabi', 'Frances Baard', 'Gert Sibande',
       'iLembe', 'Joe Gqabi', 'John Taolo Gaetsewe', 'Lejweleputswa',
       'Mangaung', 'Mopani', 'Namakwa', 'Nelson Mandela Bay',
       'Ngaka Modiri Molema', 'Nkangala', 'O.R.Tambo', 'Overberg',
       'Pixley ka Seme', 'Sedibeng', 'Sekhukhune', 'Sisonke',
       'Thabo Mofutsanyane', 'Ugu', 'Umgungundlovu', 'Umkhanyakude',
       'Umzinyathi', 'Uthukela', 'Uthungulu', 'Vhembe', 'Waterberg',
       'West Coast', 'West Rand', 'Xhariep', 'Z F Mgcawu', 'Zululand'],
      dtype=object)

In [26]:
geo_prov.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   ADM1_ID   9 non-null      object  
 1   ADM1_EN   9 non-null      object  
 2   geometry  9 non-null      geometry
dtypes: geometry(1), object(2)
memory usage: 344.0+ bytes


### Create a Dataframe containing the Police Station Crime Data
  
Read in the data and create a DataFrame.

In [57]:
df = pd.read_parquet('../../../data/crime_data_2016_21.parquet')

In [58]:
df

Unnamed: 0,station,province,district,crime_category,date,number_of_crimes,latitude,longitude
0,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-16,470,-33.02058,27.90288
1,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-16,411,-33.02058,27.90288
2,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Mar-16,477,-33.02058,27.90288
3,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-17,476,-33.02058,27.90288
4,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-17,427,-33.02058,27.90288
...,...,...,...,...,...,...,...,...
3665371,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Nov-20,0,-26.27697,27.83896
3665372,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Dec-20,0,-26.27697,27.83896
3665373,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Oct-21,0,-26.27697,27.83896
3665374,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Nov-21,0,-26.27697,27.83896


In [59]:
df5 = pd.read_csv('district_municipals.csv')
df5  = pd.DataFrame(df5[['district', 'main_district']])
df5

Unnamed: 0,district,main_district
0,Mount Ayliff Cc,Alfred Nzo
1,Butterworth Cc,Amathole
2,Alice Cc,Amathole
3,Rustenburg Cc,Bojanala
4,Brits Cc,Bojanala
...,...,...
117,Zf Mgcawu Cc,Z F Mgcawu
118,King Cetshwayo Cc,Uthungulu
119,Ilembe Cc,iLembe
120,Amajuba Cc,Zululand


In [60]:
df = df.merge(df5, on='district', how='left')
df

Unnamed: 0,station,province,district,crime_category,date,number_of_crimes,latitude,longitude,main_district
0,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-16,470,-33.02058,27.90288,Buffalo City
1,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-16,411,-33.02058,27.90288,Buffalo City
2,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Mar-16,477,-33.02058,27.90288,Buffalo City
3,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-17,476,-33.02058,27.90288,Buffalo City
4,East London,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-17,427,-33.02058,27.90288,Buffalo City
...,...,...,...,...,...,...,...,...,...
3665371,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Nov-20,0,-26.27697,27.83896,City of Johannesburg
3665372,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Dec-20,0,-26.27697,27.83896,City of Johannesburg
3665373,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Oct-21,0,-26.27697,27.83896,City of Johannesburg
3665374,Protea Glen,Gauteng,Soweto West Cc,Truck hijacking,Nov-21,0,-26.27697,27.83896,City of Johannesburg


In [61]:
df.columns

Index(['station', 'province', 'district', 'crime_category', 'date',
       'number_of_crimes', 'latitude', 'longitude', 'main_district'],
      dtype='object')

In [62]:
df = df[
    ['station', 'district', 'main_district', 'province', 'date',
     'crime_category', 'number_of_crimes', 'latitude', 'longitude',]]
df.head(1)

Unnamed: 0,station,district,main_district,province,date,crime_category,number_of_crimes,latitude,longitude
0,East London,East London Cc,Buffalo City,Eastern Cape,Jan-16,17 Community Reported Serious Crime,470,-33.02058,27.90288


In [63]:
df.rename(columns = {'district':'municipality', 'main_district': 'district'}, inplace = True)
df.head(1)

Unnamed: 0,station,municipality,district,province,date,crime_category,number_of_crimes,latitude,longitude
0,East London,East London Cc,Buffalo City,Eastern Cape,Jan-16,17 Community Reported Serious Crime,470,-33.02058,27.90288


Change the format of 'Kwazulu/Natal' to match 'Kwazulu-Natal'.

In [64]:
df['province'] = df['province'].replace('Kwazulu/Natal','KwaZulu-Natal')
df.sample(10)

Unnamed: 0,station,municipality,district,province,date,crime_category,number_of_crimes,latitude,longitude
1360057,Wonderboompoort,Tshwane West Cc,City of Tshwane,Gauteng,Aug-20,Carjacking,0,-25.70428,28.18587
1759958,Bulwer,Harry Gwala Cc,Sisonke,KwaZulu-Natal,Dec-18,Bank Robbery,0,-29.8118,29.76945
884423,Tweespruit,Selosesha Cc,Mangaung,Free State,Mar-19,Rape,1,-29.18634,27.02513
2196703,Bandelierkop,Makhado Cc,Vhembe,Limpopo,Aug-16,Other serious crime,0,-23.32103,29.79739
832571,Marquard,Ficksburg Cc,Thabo Mofutsanyane,Free State,Dec-21,Burglary at residential premises,6,-28.66536,27.4259
1787696,Umbumbulu,Ethekwini Outer/S Cc,Ekurhuleni,KwaZulu-Natal,Sep-18,Burglary at non-residential premises,6,-29.99128,30.70896
3047,East London,East London Cc,Buffalo City,Eastern Cape,Dec-17,Sexual offences detected as a result of police...,0,-33.02058,27.90288
1240740,Dawn Park,Ekurhuleni Centr Cc,Ekurhuleni,Gauteng,Oct-16,Neglect and ill-treatment of children,0,-26.315,28.2477
407708,Hofmeyr,Cradock Cc,Chris Hani,Eastern Cape,Sep-18,Robbery with aggravating circumstances,0,-31.65593,25.80221
1647085,Umzimkhulu,Harry Gwala Cc,Sisonke,KwaZulu-Natal,Feb-20,Property-related crime,24,-30.26423,29.93638


Lets check the shape of the dataframe and the length of the 'stations'.

In [65]:
df.shape, len(df['province'].unique()), df.shape, len(df['station'].unique()), df.shape, len(df['district'].unique())

((3665376, 9), 9, (3665376, 9), 1158, (3665376, 9), 51)

Quick check of the shape reveals there may be some missing data points.

In [66]:
geo_stat.shape, geo_prov.shape, geo_dist.shape

((1152, 2), (9, 3), (52, 3))

Lets check the column names.

In [67]:
geo_stat.columns, geo_prov.columns, geo_dist.columns

(Index(['COMPNT_NM', 'geometry'], dtype='object'),
 Index(['ADM1_ID', 'ADM1_EN', 'geometry'], dtype='object'),
 Index(['ADM2_ID', 'ADM2_EN', 'geometry'], dtype='object'))

We will rename the column 'COMPNT_NM' to 'station', and change the case to 'title' to match the 'df' DataFrame.  
  
We will renmane the column 'ADM1_EN' to 'province', and 'geometry' to 'geometry_prov'  
  
We will renmane the column 'ADM2_EN' to 'district' and 'geometry' to 'geometry_dist'

In [68]:
geo_stat.rename(columns = {'COMPNT_NM':'station'}, inplace = True)
geo_stat['station'] = geo_stat['station'].str.title()
geo_stat

Unnamed: 0,station,geometry
0,Botshabelo,"POLYGON ((26.77137 -29.21403, 26.77330 -29.221..."
1,Khubusidrift,"POLYGON ((27.72830 -32.53050, 27.72842 -32.531..."
2,Stutterheim,"POLYGON ((27.50201 -32.44217, 27.49884 -32.465..."
3,Motherwell,"POLYGON ((25.61061 -33.81772, 25.60713 -33.822..."
4,Kwadwesi,"POLYGON ((25.45586 -33.83107, 25.46660 -33.833..."
...,...,...
1147,Pabalello,"POLYGON ((21.19508 -28.41724, 21.22427 -28.438..."
1148,Paarl,"POLYGON ((18.89305 -33.57090, 18.89717 -33.573..."
1149,Darling,"POLYGON ((18.15596 -33.32066, 18.15653 -33.320..."
1150,Eendekuil,"POLYGON ((19.01670 -32.75663, 18.98556 -32.770..."


In [69]:
geo_prov.rename(columns = {'ADM1_EN':'province', 'geometry': 'geometry_prov'}, inplace = True)
geo_prov

Unnamed: 0,ADM1_ID,province,geometry_prov
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
1,FS,Free State,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
2,GT,Gauteng,"POLYGON ((28.24428 -26.88478, 28.24696 -26.884..."
3,KZN,KwaZulu-Natal,"POLYGON ((30.19386 -31.08126, 30.19382 -31.081..."
4,LIM,Limpopo,"POLYGON ((31.88383 -23.98459, 31.88092 -23.967..."
5,MP,Mpumalanga,"POLYGON ((31.88383 -23.98459, 31.85268 -23.986..."
6,NW,North West,"POLYGON ((28.29816 -25.31037, 28.29835 -25.294..."
7,NC,Northern Cape,"POLYGON ((22.63217 -26.12128, 22.63196 -26.121..."
8,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -..."


In [70]:
geo_dist.rename(columns = {'ADM2_EN':'district', 'geometry': 'geometry_dist'}, inplace = True)
geo_dist

Unnamed: 0,ADM2_ID,district,geometry_dist
0,DC44,Alfred Nzo,"POLYGON ((29.57220 -30.65515, 29.57219 -30.655..."
1,DC25,Amajuba,"POLYGON ((30.44827 -27.32774, 30.44836 -27.327..."
2,DC12,Amathole,"POLYGON ((28.35197 -31.82865, 28.35177 -31.828..."
3,DC37,Bojanala,"POLYGON ((28.29816 -25.31037, 28.29835 -25.294..."
4,BUF,Buffalo City,"POLYGON ((28.07553 -32.90779, 28.07532 -32.907..."
5,DC10,Cacadu,"POLYGON ((24.50627 -31.70683, 24.50615 -31.706..."
6,DC2,Cape Winelands,"POLYGON ((20.43860 -32.94006, 20.43684 -32.938..."
7,DC35,Capricorn,"POLYGON ((28.92328 -22.45704, 28.92253 -22.457..."
8,DC5,Central Karoo,"POLYGON ((24.15246 -31.78861, 24.14286 -31.789..."
9,DC13,Chris Hani,"POLYGON ((28.38127 -31.48000, 28.38104 -31.479..."


The 'geo_prov' data matches the 'df' 'DataFrame'

Lets check to see what values are missing from 'geojson['station']'.

In [18]:
df_in = df['station'].unique()
ge_in = geo_stat['station'].unique()
set(df_in) ^ set(ge_in)

{'Bohlokong',
 'Int Airport C Town',
 'Pholile',
 'Protea',
 'Qhasa',
 'Samora Machel'}

Lets fix the missing geodata by merging coords into nearest Police Station

In [19]:
# This is used to identify the 'ID' of the row we are going to duplicate.
geo_stat[geo_stat['station'] == 'Bethlehem']

Unnamed: 0,station,geometry
274,Bethlehem,"POLYGON ((28.63275 -28.04763, 28.63127 -28.057..."


Copy row 274 ('Bethlehem') and rename to 'Bohlokong'.

In [20]:
station = geo_stat.iloc[[274],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'Bohlokong' # change the value
geo_stat = pd.concat([geo_stat, station]) #append to the original df

Copy row 509 ('Cape Town Central') and rename to 'Int Airport C Town'.

In [21]:
station = geo_stat.iloc[[509],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'Int Airport C Town' # change the value
geo_stat = pd.concat([geo_stat, station]) #append to the original df

Copy row 884 ('Matatiele') and rename to 'Pholile'.

In [22]:
station = geo_stat.iloc[[884],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'Pholile' # change the value
geo_stat = pd.concat([geo_stat, station]) #append to the original df

Copy row 428 ('Protea Glen') and rename to 'Protea'.

In [23]:
station = geo_stat.iloc[[428],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'Protea' # change the value
geo_stat = pd.concat([geo_stat, station]) #append to the original df

Copy row 175 ('Flagstaff') and rename to 'Qhasa'.

In [24]:
station = geo_stat.iloc[[175],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'Qhasa' # change the value
geo_stat = pd.concat([geo_stat, station]) #append to the original df

Copy row 719 ('Philippi East') and rename to 'Samora Machel'.

In [25]:
station = geo_stat.iloc[[719],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'Samora Machel' # change the value
geo_stat = pd.concat([geo_stat, station]) #append to the original df

Quick check of the corrections.

In [26]:
len(df['station'].unique()), len(geo_stat['station'])

(1158, 1158)

Now we can check the data types.

In [27]:
geo_stat.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1158 entries, 0 to 719
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   station   1158 non-null   object  
 1   geometry  1158 non-null   geometry
dtypes: geometry(1), object(1)
memory usage: 27.1+ KB


In [28]:
geo_prov.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   ADM1_ID        9 non-null      object  
 1   province       9 non-null      object  
 2   geometry_prov  9 non-null      geometry
dtypes: geometry(1), object(2)
memory usage: 344.0+ bytes


In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3665376 entries, 0 to 3665375
Data columns (total 8 columns):
 #   Column            Dtype  
---  ------            -----  
 0   station           object 
 1   province          object 
 2   district          object 
 3   crime_category    object 
 4   date              object 
 5   number_of_crimes  int32  
 6   latitude          float64
 7   longitude         float64
dtypes: float64(2), int32(1), object(5)
memory usage: 237.7+ MB


## Merge the DataFrames

Lets create a new DataFrame of the merged DataFrames.

In [30]:
geospatial_main = geo_prov.merge(df, on=['province'], how='left')

In [31]:
geospatial_main

Unnamed: 0,ADM1_ID,province,geometry_prov,station,district,crime_category,date,number_of_crimes,latitude,longitude
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,Jan-16,470,-33.02058,27.90288
1,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,Feb-16,411,-33.02058,27.90288
2,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,Mar-16,477,-33.02058,27.90288
3,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,Jan-17,476,-33.02058,27.90288
4,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,Feb-17,427,-33.02058,27.90288
...,...,...,...,...,...,...,...,...,...,...
3665371,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -...",Int Airport C Town,East Metropol,Truck hijacking,Nov-20,0,-33.97146,18.59990
3665372,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -...",Int Airport C Town,East Metropol,Truck hijacking,Dec-20,0,-33.97146,18.59990
3665373,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -...",Int Airport C Town,East Metropol,Truck hijacking,Oct-21,0,-33.97146,18.59990
3665374,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -...",Int Airport C Town,East Metropol,Truck hijacking,Nov-21,0,-33.97146,18.59990


Convert to a GeoPandas DataFrame.

In [32]:
geospatial_main = gpd.GeoDataFrame(geospatial_main, geometry='geometry_prov')

In [33]:
type(geospatial_main)

geopandas.geodataframe.GeoDataFrame

Lets create a matching 'prov_id' from 'ADM1_ID'.

In [34]:
geospatial_main['prov_id'] = geospatial_main.loc[:, 'ADM1_ID']

In [35]:
geospatial_main.sample(2)

Unnamed: 0,ADM1_ID,province,geometry_prov,station,district,crime_category,date,number_of_crimes,latitude,longitude,prov_id
3420334,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -...",Bredasdorp,Overberg Cc,Neglect and ill-treatment of children,Aug-19,0,-34.53644,20.04727,WC
1911691,KZN,KwaZulu-Natal,"POLYGON ((30.19386 -31.08126, 30.19382 -31.081...",Ndumo,Umkhanyakude Cc,Robbery of cash in transit,May-16,0,-26.9201,32.26491,KZN


Lets change the date to 'datetime'.

In [36]:
geospatial_main['date'] = pd.to_datetime(geospatial_main['date'], format='%b-%y')

Quick check.

In [37]:
geospatial_main.head(1)

Unnamed: 0,ADM1_ID,province,geometry_prov,station,district,crime_category,date,number_of_crimes,latitude,longitude,prov_id
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,2016-01-01,470,-33.02058,27.90288,EC


### Create Month and Year columns

In [38]:
geospatial_main['month'] = geospatial_main['date'].apply(lambda x: x.month)
geospatial_main['month'] = geospatial_main['month'].apply(lambda x: calendar.month_abbr[x])
geospatial_main['year'] = geospatial_main['date'].apply(lambda x: x.year)
geospatial_main.head(1)

Unnamed: 0,ADM1_ID,province,geometry_prov,station,district,crime_category,date,number_of_crimes,latitude,longitude,prov_id,month,year
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081...",East London,East London Cc,17 Community Reported Serious Crime,2016-01-01,470,-33.02058,27.90288,EC,Jan,2016


Reorder columns

In [39]:

geospatial_main = geospatial_main[
    ['station', 'district', 'province', 'prov_id',
     'crime_category', 'date', 'month', 'year',
     'number_of_crimes', 'latitude', 'longitude',
     'ADM1_ID', 'geometry_prov']]
geospatial_main.head(1)

Unnamed: 0,station,district,province,prov_id,crime_category,date,month,year,number_of_crimes,latitude,longitude,ADM1_ID,geometry_prov
0,East London,East London Cc,Eastern Cape,EC,17 Community Reported Serious Crime,2016-01-01,Jan,2016,470,-33.02058,27.90288,EC,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."


In [40]:
geospatial_main['year'].info()

<class 'pandas.core.series.Series'>
Int64Index: 3665376 entries, 0 to 3665375
Series name: year
Non-Null Count    Dtype
--------------    -----
3665376 non-null  int64
dtypes: int64(1)
memory usage: 55.9 MB


Lets change the year to a string.

In [41]:
geospatial_main['year'] = geospatial_main['year'].map(str)

In [42]:
geospatial_main['year'].info()

<class 'pandas.core.series.Series'>
Int64Index: 3665376 entries, 0 to 3665375
Series name: year
Non-Null Count    Dtype 
--------------    ----- 
3665376 non-null  object
dtypes: object(1)
memory usage: 55.9+ MB


## Mapping
Lets create a dataframe for the Folium map.

### Yearly Data

In [43]:
map1 = geospatial_main.groupby(['prov_id', 'province', 'year', 'ADM1_ID'], as_index=False).agg({'number_of_crimes': 'sum'})
map1.head()

Unnamed: 0,prov_id,province,year,ADM1_ID,number_of_crimes
0,EC,Eastern Cape,2016,EC,587452
1,EC,Eastern Cape,2017,EC,570766
2,EC,Eastern Cape,2018,EC,578181
3,EC,Eastern Cape,2019,EC,579301
4,EC,Eastern Cape,2020,EC,500245


In [44]:
map1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   prov_id           54 non-null     object
 1   province          54 non-null     object
 2   year              54 non-null     object
 3   ADM1_ID           54 non-null     object
 4   number_of_crimes  54 non-null     int32 
dtypes: int32(1), object(4)
memory usage: 2.0+ KB


In [45]:
geo_prov

Unnamed: 0,ADM1_ID,province,geometry_prov
0,EC,Eastern Cape,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
1,FS,Free State,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
2,GT,Gauteng,"POLYGON ((28.24428 -26.88478, 28.24696 -26.884..."
3,KZN,KwaZulu-Natal,"POLYGON ((30.19386 -31.08126, 30.19382 -31.081..."
4,LIM,Limpopo,"POLYGON ((31.88383 -23.98459, 31.88092 -23.967..."
5,MP,Mpumalanga,"POLYGON ((31.88383 -23.98459, 31.85268 -23.986..."
6,NW,North West,"POLYGON ((28.29816 -25.31037, 28.29835 -25.294..."
7,NC,Northern Cape,"POLYGON ((22.63217 -26.12128, 22.63196 -26.121..."
8,WC,Western Cape,"MULTIPOLYGON (((19.41807 -34.68668, 19.41806 -..."


Merge Geometry into DataFrame.

In [46]:
mapped_prov = pd.merge(map1, geo_prov, on=['ADM1_ID'], how='left')
mapped_prov = mapped_prov[['prov_id', 'province_x', 'year', 'ADM1_ID', 'number_of_crimes', 'geometry_prov']]
mapped_prov.rename(columns = {'province_x':'province'}, inplace = True)
mapped_prov

Unnamed: 0,prov_id,province,year,ADM1_ID,number_of_crimes,geometry_prov
0,EC,Eastern Cape,2016,EC,587452,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
1,EC,Eastern Cape,2017,EC,570766,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
2,EC,Eastern Cape,2018,EC,578181,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
3,EC,Eastern Cape,2019,EC,579301,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
4,EC,Eastern Cape,2020,EC,500245,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
5,EC,Eastern Cape,2021,EC,516424,"POLYGON ((30.19386 -31.08126, 30.19386 -31.081..."
6,FS,Free State,2016,FS,340704,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
7,FS,Free State,2017,FS,320926,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
8,FS,Free State,2018,FS,318520,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."
9,FS,Free State,2019,FS,305610,"POLYGON ((28.24428 -26.88478, 28.24348 -26.884..."


Convert to a GeoPandas DatFrame.

In [47]:
mapped_prov = gpd.GeoDataFrame(mapped_prov, geometry='geometry_prov')
type(mapped_prov)

geopandas.geodataframe.GeoDataFrame

In [None]:
sa_map = flm.Map(location=[-28.343, 25.862], zoom_start=6, scrollWheelZoom=False, overlay=False, tiles=None)

flm.TileLayer('openstreetmap',name="Light Map",control=False).add_to(sa_map)

ft_2016 = mapped_prov[mapped_prov['year'] == '2016']
ft_2017 = mapped_prov[mapped_prov['year'] == '2017']
ft_2018 = mapped_prov[mapped_prov['year'] == '2018']
ft_2019 = mapped_prov[mapped_prov['year'] == '2019']
ft_2020 = mapped_prov[mapped_prov['year'] == '2020']
ft_2021 = mapped_prov[mapped_prov['year'] == '2021']

fg0 = flm.FeatureGroup(name='ft_2016',overlay=False).add_to(sa_map)
fg1 = flm.FeatureGroup(name='ft_2017',overlay=False).add_to(sa_map)
fg2 = flm.FeatureGroup(name='ft_2018',overlay=False).add_to(sa_map)
fg3 = flm.FeatureGroup(name='ft_2019',overlay=False).add_to(sa_map)
fg4 = flm.FeatureGroup(name='ft_2020',overlay=False).add_to(sa_map)
fg5 = flm.FeatureGroup(name='ft_2021',overlay=False).add_to(sa_map)

# fg1 = flm.FeatureGroup(name='Crimes Per Year', overlay=False).add_to(sa_map)

fs = [fg0, fg1, fg2, fg3, fg4, fg5]
year_data = [ft_2016, ft_2017, ft_2018, ft_2019, ft_2020, ft_2021]


custom_scale = (mapped_prov['number_of_crimes'].quantile((0,0.2,0.4,0.6,0.7,0.8,0.9,1))).tolist()

for i in range(len(year_data)):
    crimes_per_year = flm.Choropleth(
                geo_data=r'../../../data/geodata/sa_provinces.geojson',
                data=year_data[i],
                columns=['prov_id', 'number_of_crimes'],
                key_on='feature.properties.ADM1_ID',
                threshold_scale=custom_scale,
                fill_color='YlGnBu',
                nan_fill_color="blue",
                fill_opacity=0.5,
                line_opacity=0.2,
                legend_name='Number of Crimes ',
                highlight=True,
                line_color='black').geojson.add_to(fs[i])

    # Add customized tooltips to the map
    flm.features.GeoJson(
                        data = year_data[i],
                        name='Crimes Per Year',
                        smooth_factor=2,
                        style_function=lambda x: {'color':'black','fillColor':'transparent','weight':0.5},
                        tooltip=flm.features.GeoJsonTooltip(
                            fields=['province',
                                    'year',
                                    'number_of_crimes',
                                ],
                            aliases=["Province:",
                                    "Year",
                                    "Number of Crimes:",
                                    ],
                            localize=True,
                            sticky=False,
                            labels=True,
                            style="""
                                background-color: #F0EFEF;
                                border: 2px solid black;
                                border-radius: 3px;
                                box-shadow: 3px;
                            """,
                            max_width=800,),
                                highlight_function=lambda x: {'weight':3,'fillColor':'grey'},
                            ).add_to(crimes_per_year)

flm.TileLayer('openstreetmap', overlay=True, name="light mode").add_to(sa_map)
flm.LayerControl(collapsed=False).add_to(sa_map)
sa_map.save('SA_Yearly_Crime_by_Province.html')
sa_map