#### Import necessary dependencies

In [None]:
import geopandas as gpd
import pandas as pd
import folium
import boto3
from folium.plugins import MarkerCluster

#### Read the CSV files

In [None]:
# Read the clipped Ookla data for South Africa
# The ookla data we accessed was for the whole world, (a significantly huge file). It was easier to overlay it with SA
# boundary data on QGIS for clipping purposes. We were only interested in ookla speed tests data for SA. 
# The resultant file from the clipping process on QGIS is the one being accessed here.  

gdf = gpd.read_file('https://2207-17-fibre-competitive-intensity-model-b.s3.eu-west-1.amazonaws.com/SA+ookla+clipped.geojson')

# Read the Administrative ward boundary data for South Africa (Uncheck the url to the S3 bucket and check the path to the local pc)

# gdf1 = gpd.read_file('https://2207-17-fibre-competitive-intensity-model-b.s3.eu-west-1.amazonaws.com/2011+Wards+Administrative+boundaries/Wards2011.shp')

gdf1 = gpd.read_file("C:/Users/nmwem/OneDrive/Desktop/SA-Maps/Wards/Wards2011.shp")

#### Calculate the centroid of each polygon. It eases the process of overlaying ookla parcels on ward boundaries. Central points eliminate the issue of ookla tiles crossing administrative boundaries when overlaying the two

In [None]:
points = gdf.copy()
points['geometry'] = points['geometry'].centroid

#### Spatial join to assign each point to a respective ward

In [None]:
joined = gpd.sjoin(points, gdf1, how='left', op='within')

#### Write the dataframe to a new CSV file and save the output in an S3 bucket

In [None]:
# Instantiate boto3 by providing access and secret keys
client = boto3.client('s3', aws_access_key_id='AKIATNJHRXAPUA4DIFER', aws_secret_access_key="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")

# Provide the name of the s3 bucket
bucket = "2207-17-fibre-competitive-intensity-model-b"

# Convert df to CSV
joined.to_csv('joined-ookla_data_ward_final.csv', index=False)

# Save CSV file to S3 bucket
client.upload_file("joined-ookla_data_ward_final.csv", Bucket=bucket, Key="joined-ookla_data_ward_final.csv")

#### Count the number of points in each ward

In [None]:
counts = joined.groupby('WARD_ID').size().reset_index(name='count')

#### Merge the counts with the ward boundary data

In [None]:
merged = gdf1.merge(counts, on='WARD_ID')

In [None]:
merged.head()

#### Plot the map with color-coded ward boundaries and point markers for the centroids of each tile

In [None]:
ax = merged.plot(column='count', cmap='Greens', figsize=(12, 8), legend=True)
points.plot(ax=ax, color='red', markersize=1)
ax.set_title('Number of Ookla Tiles per Ward')
ax.set_axis_off()

#### Visualizing the result with folium

In [None]:
# Get bounding box for ookla_municipality data
bounds = merged.total_bounds
min_lon, min_lat, max_lon, max_lat = bounds

# Create folium map object
m = folium.Map(location=[(min_lat + max_lat)/2, (min_lon + max_lon)/2], zoom_start=5)

# Convert data to GeoJSON format
merged_geojson = merged.to_crs(epsg=4326).__geo_interface__

In [None]:
merged.info()

#### Visualisation via folium

In [None]:
# Create choropleth map
folium.Choropleth(
    geo_data=merged_geojson,
    data=merged,
    columns=['WARD_ID', 'count'],
    key_on='feature.properties.WARD_ID',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of tests per ward',

).add_to(m)


#### Add GeoJsonTooltip to show the exact uptake rate

In [None]:
folium.GeoJson(
    merged_geojson,
    name='tooltip',
    tooltip=folium.features.GeoJsonTooltip(
        fields=['MUNICNAME', 'WARD_ID', 'count'],
        aliases=['Municipality:', 'Ward_ID:', 'Number of tests:'],
        style=('background-color: grey; color: white;')
    )
).add_to(m)

#### Show map

In [None]:
m