In [8]:
%matplotlib inline

from datetime import datetime
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from shapely.geometry import Point
from adjustText import adjust_text

In [2]:
def quarter_start(year: int, q: int) -> datetime:
    if not 1 <= q <= 4:
        raise ValueError("Quarter must be within [1, 2, 3, 4]")

    month = [1, 4, 7, 10]
    return datetime(year, month[q - 1], 1)


In [3]:
def get_tile_url(service_type: str, year: int, q: int) -> str:
    dt = quarter_start(year, q)
    base_url = "https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance"
    url = f"{base_url}/type%3D{service_type}/year%3D{dt:%Y}/quarter%3D{q}/{dt:%Y-%m-%d}_performance_{service_type}_tiles.zip"
    return url

In [4]:
# Download data
tile_url = get_tile_url("fixed", 2023, 2)
print(f"Fetching data from: {tile_url}")
tiles = gp.read_file(tile_url)
tiles.head()

Fetching data from: https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance/type%3Dfixed/year%3D2023/quarter%3D2/2023-04-01_performance_fixed_tiles.zip


Unnamed: 0,quadkey,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,geometry
0,22133222312322,94200,7623,251,3,2,"POLYGON ((-160.02686 70.64359, -160.02136 70.6..."
1,22133222330023,113863,14980,132,6,3,"POLYGON ((-160.04333 70.63631, -160.03784 70.6..."
2,22133222330032,80397,19455,126,5,3,"POLYGON ((-160.03784 70.63631, -160.03235 70.6..."
3,22133222330100,230050,16208,110,1,1,"POLYGON ((-160.02686 70.64177, -160.02136 70.6..."
4,22133222330210,121560,7163,169,8,4,"POLYGON ((-160.03784 70.63448, -160.03235 70.6..."


In [11]:
import os

# Define the relative path to the boundaries folder
boundaries_folder = "./boundaires/" 

# Dictionary with country names and corresponding file names
geojson_files = {
    "Philippines": os.path.join(boundaries_folder, "geoBoundaries-PHL-ADM0.geojson"),
    "Japan": os.path.join(boundaries_folder, "geoBoundaries-JPN-ADM0.geojson"),
    "Taiwan": os.path.join(boundaries_folder, "geoBoundaries-TWN-ADM0.geojson"),
}

# Initialize an empty list to store results
all_tiles = []

# Loop through each country and process the tiles
for country, geojson_path in geojson_files.items():
    # Load the boundary from GeoJSON file
    boundary = gpd.read_file(geojson_path)
    boundary = boundary.to_crs(4326)  # Ensure CRS matches the tiles
    print(f"{country} boundary loaded:")
    print(boundary.head())
    
    
    # Perform spatial join between tiles and the country's boundary
    country_tiles = gpd.sjoin(tiles, boundary, how="inner", predicate='intersects')

    
    # Convert speeds to Mbps
    country_tiles['avg_d_mbps'] = country_tiles['avg_d_kbps'] / 1000
    country_tiles['avg_u_mbps'] = country_tiles['avg_u_kbps'] / 1000
    country_tiles['country'] = country  # Add a column to distinguish countries
    
    # Append to the list
    all_tiles.append(country_tiles)

# Combine all results into a single GeoDataFrame
combined_tiles = gpd.GeoDataFrame(pd.concat(all_tiles, ignore_index=True))
print("Combined tiles data:")
print(combined_tiles.head())

# save as csv file 
combined_tiles.to_csv("combined_tiles.csv", index=False)


Philippines boundary loaded:
         shapeName shapeISO                  shapeID shapeGroup shapeType  \
0  The Philippines           24100683B85265433280220        PHL      ADM0   

                                            geometry  
0  MULTIPOLYGON (((119.46876 4.5936, 119.46807 4....  
Japan boundary loaded:
  shapeName shapeISO                  shapeID shapeGroup shapeType  \
0     Japan      JPN  22093344B50624940221487        JPN      ADM0   

                                            geometry  
0  MULTIPOLYGON (((123.76601 24.06841, 123.76603 ...  
Taiwan boundary loaded:
           shapeName shapeISO                 shapeID shapeGroup shapeType  \
0  Republic Of China      TWN  7377636B13659935760300        TWN      ADM0   

                                            geometry  
0  MULTIPOLYGON (((121.60839 22.00133, 121.60884 ...  
Combined tiles data:
            quadkey  avg_d_kbps  avg_u_kbps  avg_lat_ms  tests  devices  \
0  1323011030310113        4316        4823  