#### Import necessary dependecies


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
import folium
import boto3

#### Read the clipped Ookla data for South Africa

In [None]:
# The ookla data we accessed was for the whole world, (a significantly huge file). It was easier to overlay it with SA
# boundary data on QGIS for clipping purposes. We were only interested in ookla speed tests data for SA. 
# The resultant file from the clipping process on QGIS is the one being accessed here. 

gdf = gpd.read_file('https://2207-17-fibre-competitive-intensity-model-b.s3.eu-west-1.amazonaws.com/SA+ookla+clipped.geojson')

#### Display the first few rows of the DataFram

In [None]:
gdf.head()

#### Plot clipped Ookla data

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
gdf.plot(ax=ax)
plt.show()

#### Read the Administrative municpal boundary data for South Africa

In [None]:
gdf1 = gpd.read_file('https://2207-17-fibre-competitive-intensity-model-b.s3.eu-west-1.amazonaws.com/MDB_Local_Municipal_Boundary_2018.geojson')

#### Display the first few rows of the DataFram

In [None]:
gdf1.head()

#### Plot South Africa administrative boundary

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
gdf1.plot(ax=ax)
plt.show()

#### Convert the DATE column to a string(Object of type Timestamp should be JSON serializable for Folium to run)

In [None]:
gdf1['DATE'] = gdf1['DATE'].astype(str)

#### Perform spatial join to obtain municipality where each entry of Ookla data belongs to

In [None]:
ookla_municipality = gpd.sjoin(gdf, gdf1, op='within')

#### Display the first few rows of the joined DataFrame

In [None]:
ookla_municipality.head()

#### Number of rows in the overlayed dataset

In [None]:
row_count1 = len(ookla_municipality)
print(row_count1)

#### Visualizing the result with folium

In [None]:
# Get bounding box for ookla_municipality data
bounds = ookla_municipality.total_bounds
min_lon, min_lat, max_lon, max_lat = bounds

# Create folium map object
m = folium.Map(location=[(min_lat + max_lat)/2, (min_lon + max_lon)/2], zoom_start=5)

# Convert data to GeoJSON format
ookla_municipality_geojson = ookla_municipality.to_crs(epsg=4326).__geo_interface__

#### Visualisation via folium


In [None]:
# Create choropleth map
folium.Choropleth(
    geo_data=ookla_municipality_geojson,
    data=ookla_municipality,
    columns=['quadkey', 'tests'],
    key_on='feature.properties.quadkey',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of tests',
    
).add_to(m)


In [None]:
folium.GeoJson(
    ookla_municipality_geojson,
    name='tooltip',
    tooltip=folium.features.GeoJsonTooltip(
        fields=['MUNICNAME','quadkey', 'tests'],
        aliases=['Municipality:', 'Quadkey:', 'Number of tests per municipality:'],
        style=('background-color: grey; color: white;')
    )
).add_to(m)

#### Show map

In [None]:
m

#### Write the dataframe to a new CSV file and save the output in an S3 bucket

In [None]:
# Instantiate boto3 by providing access and secret keys
client = boto3.client('s3', aws_access_key_id='AKIATNJHRXAPUA4DIFER', aws_secret_access_key="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")

# Provide the name of the s3 bucket
bucket = "2207-17-fibre-competitive-intensity-model-b"

# Convert df to CSV
ookla_municipality.to_csv('joined-ookla_data_municipality_final.csv', index=False)

# Save CSV file to S3 bucket
client.upload_file("joined-ookla_data_municipality_final.csv", Bucket=bucket, Key="joined-ookla_data_municipality_final.csv")