# Geospatial Visualization

## Geojson Data
  
Import Libraries

In [81]:
# Import Libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import folium as flm
import calendar

### Create a Dataframe contianing geometry of the Police Station Areas
  
Read in the data and create a DataFrame.

In [3]:
# Read the geoJSON file using geopandas
geojson = gpd.read_file(r'../../../data/geodata/South_African_police_boundaries.geojson')
geojson = geojson[["COMPNT_NM", "geometry"]] # only select 'COMPNT_NM' (Police Stations) and 'geometry' columns

In [4]:
geojson

Unnamed: 0,COMPNT_NM,geometry
0,BOTSHABELO,"POLYGON ((26.77137 -29.21403, 26.77330 -29.221..."
1,KHUBUSIDRIFT,"POLYGON ((27.72830 -32.53050, 27.72842 -32.531..."
2,STUTTERHEIM,"POLYGON ((27.50201 -32.44217, 27.49884 -32.465..."
3,MOTHERWELL,"POLYGON ((25.61061 -33.81772, 25.60713 -33.822..."
4,KWADWESI,"POLYGON ((25.45586 -33.83107, 25.46660 -33.833..."
...,...,...
1147,PABALELLO,"POLYGON ((21.19508 -28.41724, 21.22427 -28.438..."
1148,PAARL,"POLYGON ((18.89305 -33.57090, 18.89717 -33.573..."
1149,DARLING,"POLYGON ((18.15596 -33.32066, 18.15653 -33.320..."
1150,EENDEKUIL,"POLYGON ((19.01670 -32.75663, 18.98556 -32.770..."


### Create a Dataframe contianing the Police Station Data
  
Read in the data and create a DataFrame.

In [5]:
df = pd.read_parquet('../../../data/crime_data_2016_21.parquet')

In [6]:
df.head()

Unnamed: 0,station,province,district,crime_category,date,number_of_crimes,latitude,longitude
0,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-16,470,27.90288,-33.02058
1,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-16,411,27.90288,-33.02058
2,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Mar-16,477,27.90288,-33.02058
3,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-17,476,27.90288,-33.02058
4,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-17,427,27.90288,-33.02058


Lets check the shape of the dataframe and the length of the 'stations'.

In [7]:
df.shape, len(df['station'].unique())

((3665376, 8), 1157)

Quick check of the shape reveals there may be some missing data points.

In [8]:
geojson.shape

(1152, 2)

In [9]:
geojson.rename(columns = {'COMPNT_NM':'station'}, inplace = True)
geojson['station'] = geojson['station'].str.lower()
geojson.head()

Unnamed: 0,station,geometry
0,botshabelo,"POLYGON ((26.77137 -29.21403, 26.77330 -29.221..."
1,khubusidrift,"POLYGON ((27.72830 -32.53050, 27.72842 -32.531..."
2,stutterheim,"POLYGON ((27.50201 -32.44217, 27.49884 -32.465..."
3,motherwell,"POLYGON ((25.61061 -33.81772, 25.60713 -33.822..."
4,kwadwesi,"POLYGON ((25.45586 -33.83107, 25.46660 -33.833..."


Lets fix the missing geodata by merging coords into nearest Police Station

In [10]:
geojson[geojson['station'] == 'bholothwa']

Unnamed: 0,station,geometry
997,bholothwa,"POLYGON ((27.40636 -31.92052, 27.40636 -31.920..."


It will be easier to remove any Police Stations that are not in the main DataFrame as we do not have time to remap any areas.

In [11]:
geojson = geojson.drop(df.index[997])

Copy row 274 ('bethlehem') and rename to 'bohlokong'.

In [12]:
station = geojson.iloc[[274],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'bohlokong' # change the value
geojson = pd.concat([geojson,station]) #append to the original df

Copy row 509 ('cape town central') and rename to 'int airport c town'.

In [13]:
station = geojson.iloc[[509],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'int airport c town' # change the value
geojson = pd.concat([geojson,station]) #append to the original df

Copy row 884 ('matatiele') and rename to 'pholile'.

In [14]:
station = geojson.iloc[[884],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'pholile' # change the value
geojson = pd.concat([geojson,station]) #append to the original df

Copy row 428 ('protea glen') and rename to 'protea'.

In [15]:
station = geojson.iloc[[428],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'protea' # change the value
geojson = pd.concat([geojson,station]) #append to the original df

Copy row 175 ('flagstaff') and rename to 'qhasa'.

In [16]:
station = geojson.iloc[[175],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'qhasa' # change the value
geojson = pd.concat([geojson,station]) #append to the original df

Copy row 719 ('philippi east') and rename to 'samora machel'.

In [17]:
station = geojson.iloc[[719],] # pick the row you want to do repeat
station = station.reindex(station.index.repeat(1)) # repeat the row by the giving number
station.loc[:,'station'] = 'samora machel' # change the value
geojson = pd.concat([geojson,station]) #append to the original df

Quick check of the corrections.

In [18]:
len(df['station'].unique()), len(geojson['station'])

(1157, 1157)

Now we can check the data types.

In [19]:
geojson.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1157 entries, 0 to 719
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   station   1157 non-null   object  
 1   geometry  1157 non-null   geometry
dtypes: geometry(1), object(1)
memory usage: 27.1+ KB


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3665376 entries, 0 to 3665375
Data columns (total 8 columns):
 #   Column            Dtype  
---  ------            -----  
 0   station           object 
 1   province          object 
 2   district          object 
 3   crime_category    object 
 4   date              object 
 5   number_of_crimes  int32  
 6   latitude          float64
 7   longitude         float64
dtypes: float64(2), int32(1), object(5)
memory usage: 237.7+ MB


## Merge the DataFrames

Lets create a new DataFrame of the merged DataFrames.

In [21]:
geospatial_districts = pd.merge(df, geojson, on=['station'], how='inner')

In [22]:
geospatial_districts

Unnamed: 0,station,province,district,crime_category,date,number_of_crimes,latitude,longitude,geometry
0,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-16,470,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
1,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-16,411,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
2,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Mar-16,477,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
3,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Jan-17,476,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
4,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,Feb-17,427,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
...,...,...,...,...,...,...,...,...,...
3665371,protea glen,Gauteng,Soweto West Cc,Truck hijacking,Nov-20,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."
3665372,protea glen,Gauteng,Soweto West Cc,Truck hijacking,Dec-20,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."
3665373,protea glen,Gauteng,Soweto West Cc,Truck hijacking,Oct-21,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."
3665374,protea glen,Gauteng,Soweto West Cc,Truck hijacking,Nov-21,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."


Lets change the date to 'datetime'.

In [23]:
geospatial_districts['date'] = pd.to_datetime(geospatial_districts['date'], format='%b-%y')

Quick check.

In [24]:
geospatial_districts

Unnamed: 0,station,province,district,crime_category,date,number_of_crimes,latitude,longitude,geometry
0,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2016-01-01,470,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
1,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2016-02-01,411,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
2,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2016-03-01,477,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
3,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2017-01-01,476,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
4,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2017-02-01,427,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993..."
...,...,...,...,...,...,...,...,...,...
3665371,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2020-11-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."
3665372,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2020-12-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."
3665373,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2021-10-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."
3665374,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2021-11-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259..."


### Create Month and Year columns

In [25]:
geospatial_districts['month'] = geospatial_districts['date'].apply(lambda x: x.month)
geospatial_districts['month'] = geospatial_districts['month'].apply(lambda x: calendar.month_abbr[x])
geospatial_districts['year'] = geospatial_districts['date'].apply(lambda x: x.year)
geospatial_districts

Unnamed: 0,station,province,district,crime_category,date,number_of_crimes,latitude,longitude,geometry,month,year
0,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2016-01-01,470,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993...",Jan,2016
1,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2016-02-01,411,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993...",Feb,2016
2,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2016-03-01,477,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993...",Mar,2016
3,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2017-01-01,476,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993...",Jan,2017
4,east london,Eastern Cape,East London Cc,17 Community Reported Serious Crime,2017-02-01,427,27.90288,-33.02058,"POLYGON ((27.78726 -32.99353, 27.78751 -32.993...",Feb,2017
...,...,...,...,...,...,...,...,...,...,...,...
3665371,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2020-11-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259...",Nov,2020
3665372,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2020-12-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259...",Dec,2020
3665373,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2021-10-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259...",Oct,2021
3665374,protea glen,Gauteng,Soweto West Cc,Truck hijacking,2021-11-01,0,27.83896,-26.27697,"POLYGON ((27.76024 -26.26161, 27.76137 -26.259...",Nov,2021


In [26]:
geospatial_districts.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3665376 entries, 0 to 3665375
Data columns (total 11 columns):
 #   Column            Dtype         
---  ------            -----         
 0   station           object        
 1   province          object        
 2   district          object        
 3   crime_category    object        
 4   date              datetime64[ns]
 5   number_of_crimes  int32         
 6   latitude          float64       
 7   longitude         float64       
 8   geometry          geometry      
 9   month             object        
 10  year              int64         
dtypes: datetime64[ns](1), float64(2), geometry(1), int32(1), int64(1), object(5)
memory usage: 321.6+ MB


In [58]:
def total_yearly_crimes(df, year, area, name):
    '''
    Calculates number of crimes for a given year in a given area

    Args:
        df (obj): Name of the dataframe
        year (int): Year
        area (str): Area; station, province or district
        name (str): Name of the area
    '''
    tyc = df.loc[
        (df['year'] == year) & (df[area] == name), 'number_of_crimes'].sum()
    return tyc

In [64]:
print(total_yearly_crimes(geospatial_districts, 2016, 'province', 'Eastern Cape'))

587452


In [137]:
len(geospatial_districts.loc[(geospatial_districts['year'] == 2016) & (geospatial_districts['district'] == 'East London Cc'), 'station'].unique())

8

In [132]:
bg = geospatial_districts.loc[(geospatial_districts['year'] == 2016) & (geospatial_districts['district'] == 'East London Cc')]['latitude'].tolist()
len(bg)

4224

In [75]:
def total_yearly_area(df, year, area):
    '''
    Calculates number of crimes for a given year in a given area

    Args:
        df (obj): Name of the dataframe
        year (int): Year
        area (str): 'province' or 'district'
    '''
    tya = len(df.loc[
        (df['year'] == year) & (df[area]), 'latitude'])
    return tya

In [77]:
total_yearly_area(geospatial_districts, 2016, 'district')

610896

In [144]:
def map_rtc(df, year, area):
    cond = (df['year'] == year) & (df['province'] == area)

    lon = df[cond]['latitude'].tolist()
    lat = df[cond]['longitude'].tolist()
    are = df[cond]['station'].tolist()
    yea = df[cond]['year'].tolist()

    # def color_producer(tot):
    #     if len(tot) <= 5000:
    #         return 'green'
    #     elif len(tot) >= 7000:
    #         return 'blue'
    #     else:
    #         return 'orange'

    html = '''<h4>Information</h4>
    <b>Lat: </b> %s <br />
    <b>Lon: </b> %s <br />
    <b>Station: </b> %s <br />
    <b>Year: </b> %s
    <script>
            L_PREFER_CANVAS = true;
            L_NO_TOUCH = false;
            L_DISABLE_3D = false;
    </script>
    '''
    map = flm.Map(
        location=[lat[1], lon[1]],
        zoom_start=10, scrollWheelZoom=False)

    fg = flm.FeatureGroup(name='My V Map')

    for lt, ln, ar, ye in zip((lat), (lon), (are), (yea)):
        iframe = flm.IFrame(html = html % ((lt), (ln), (ar), (ye)), height = 165)
        popup = flm.Popup(iframe, min_width=200, max_width=500)
        fg.add_child(flm.CircleMarker(location = [lt, ln], popup = (popup), fill_color='blue', color='None', radius=6, fill_opacity = 0.5))
        map.add_child(fg)

    # map.save('C:/Users/kmedr/My Drive/Omdena_Predicting_RTC_main/reports/map.html')
    return map

In [None]:
map_rtc(geospatial_districts, 2016, 'Eastern Cape')