# Extracting datasets

## Relevant Imports

In [1]:
%matplotlib inline

from datetime import datetime

import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio

from adjustText import adjust_text
from shapely.geometry import Point, box
from rasterio.mask import mask

## VIIRS Data

### Japan Data

In [2]:
# Filepaths
raster_file = r'SVDNB_npp_20230101-20230131_75N060E_vcmcfg_v10_c202302080600.avg_rade9h.tif'
boundary_file = r'boundaries\geoBoundaries-JPN-ADM0.geojson'
output_csv = r'Japan_light_intensity.csv'

# Load Japan's Boundary
Japan = gpd.read_file(boundary_file)
Japan = Japan.to_crs(epsg=4326)

print(Japan.total_bounds)
# Open the Raster and Check Overlap
with rasterio.open(raster_file) as src:
    raster_bounds = box(*src.bounds)
    print("Raster Bounds:", src.bounds)
    print("Japan Bounds:", Japan.total_bounds)

    if not raster_bounds.intersects(Japan.unary_union):
        raise ValueError("Japan's boundary does not overlap with the raster extent.")

    # Clip the raster
    Japan_geom_list = [feature["geometry"] for feature in Japan.__geo_interface__["features"]]
    clipped_raster, clipped_transform = mask(src, Japan_geom_list, crop=True)

# Extract Raster Values
light_intensity = clipped_raster[0]
rows, cols = np.where(~np.isnan(light_intensity))
values = light_intensity[rows, cols]
x_coords, y_coords = rasterio.transform.xy(clipped_transform, rows, cols)

data = pd.DataFrame({
    'longitude': x_coords,
    'latitude': y_coords,
    'light_intensity': values
})
data.to_csv(output_csv, index=False)
print(f"Extracted data saved to {output_csv}")


[122.93391306  24.04561583 153.98667512  45.55723905]
Raster Bounds: BoundingBox(left=59.99791666665, bottom=0.0020827333499937595, right=179.99791762665, top=75.00208333335)
Japan Bounds: [122.93391306  24.04561583 153.98667512  45.55723905]


  if not raster_bounds.intersects(Japan.unary_union):


Extracted data saved to Japan_light_intensity.csv


### Philippines Data

In [3]:
# Filepaths
raster_file = r'SVDNB_npp_20230101-20230131_75N060E_vcmcfg_v10_c202302080600.avg_rade9h.tif'
boundary_file = r'boundaries\geoBoundaries-PHL-ADM0.geojson'
output_csv = r'Philippines_light_intensity.csv'

# Load Taiwan's Boundary
Philippines = gpd.read_file(boundary_file)
Philippines = Philippines.to_crs(epsg=4326)

print(Philippines.total_bounds)
# Open the Raster and Check Overlap
with rasterio.open(raster_file) as src:
    raster_bounds = box(*src.bounds)
    print("Raster Bounds:", src.bounds)
    print("Philippines Bounds:", Philippines.total_bounds)

    if not raster_bounds.intersects(Philippines.unary_union):
        raise ValueError("Philippines's boundary does not overlap with the raster extent.")

    # Clip the raster
    Philippines_geom_list = [feature["geometry"] for feature in Philippines.__geo_interface__["features"]]
    clipped_raster, clipped_transform = mask(src, Philippines_geom_list, crop=True)

# Extract Raster Values
light_intensity = clipped_raster[0]
rows, cols = np.where(~np.isnan(light_intensity))
values = light_intensity[rows, cols]
x_coords, y_coords = rasterio.transform.xy(clipped_transform, rows, cols)

data = pd.DataFrame({
    'longitude': x_coords,
    'latitude': y_coords,
    'light_intensity': values
})
data.to_csv(output_csv, index=False)
print(f"Extracted data saved to {output_csv}")


[114.27790169   4.5872945  126.60495743  21.12178332]
Raster Bounds: BoundingBox(left=59.99791666665, bottom=0.0020827333499937595, right=179.99791762665, top=75.00208333335)
Philippines Bounds: [114.27790169   4.5872945  126.60495743  21.12178332]


  if not raster_bounds.intersects(Philippines.unary_union):


Extracted data saved to Philippines_light_intensity.csv


### Taiwan Data 

In [4]:
# Filepaths
raster_file = r'SVDNB_npp_20230101-20230131_75N060E_vcmcfg_v10_c202302080600.avg_rade9h.tif'
boundary_file = r'boundaries\geoBoundaries-TWN-ADM0.geojson'
output_csv = r'Taiwan_light_intensity.csv'

# Load Taiwan's Boundary
Taiwan = gpd.read_file(boundary_file)
Taiwan = Taiwan.to_crs(epsg=4326)

print(Taiwan.total_bounds)
# Open the Raster and Check Overlap
with rasterio.open(raster_file) as src:
    raster_bounds = box(*src.bounds)
    print("Raster Bounds:", src.bounds)
    print("Taiwan Bounds:", Taiwan.total_bounds)

    if not raster_bounds.intersects(Taiwan.unary_union):
        raise ValueError("Taiwan's boundary does not overlap with the raster extent.")

    # Clip the raster
    Taiwan_geom_list = [feature["geometry"] for feature in Taiwan.__geo_interface__["features"]]
    clipped_raster, clipped_transform = mask(src, Taiwan_geom_list, crop=True)

# Extract Raster Values
light_intensity = clipped_raster[0]
rows, cols = np.where(~np.isnan(light_intensity))
values = light_intensity[rows, cols]
x_coords, y_coords = rasterio.transform.xy(clipped_transform, rows, cols)

data = pd.DataFrame({
    'longitude': x_coords,
    'latitude': y_coords,
    'light_intensity': values
})
data.to_csv(output_csv, index=False)
print(f"Extracted data saved to {output_csv}")


[118.20920337  21.89259297 122.03704767  26.25789185]
Raster Bounds: BoundingBox(left=59.99791666665, bottom=0.0020827333499937595, right=179.99791762665, top=75.00208333335)
Taiwan Bounds: [118.20920337  21.89259297 122.03704767  26.25789185]


  if not raster_bounds.intersects(Taiwan.unary_union):


Extracted data saved to Taiwan_light_intensity.csv


## Ookla Speedtest Data

In [2]:
def quarter_start(year: int, q: int) -> datetime:
    if not 1 <= q <= 4:
        raise ValueError("Quarter must be within [1, 2, 3, 4]")

    month = [1, 4, 7, 10]
    return datetime(year, month[q - 1], 1)


def get_tile_url(service_type: str, year: int, q: int) -> str:
    dt = quarter_start(year, q)

    base_url = "https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance"
    url = f"{base_url}/type%3D{service_type}/year%3D{dt:%Y}/quarter%3D{q}/{dt:%Y-%m-%d}_performance_{service_type}_tiles.zip"
    return url

In [3]:
tile_url = get_tile_url("fixed", 2020, 2)
tile_url

'https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance/type%3Dfixed/year%3D2020/quarter%3D2/2020-04-01_performance_fixed_tiles.zip'

In [4]:
tiles = gp.read_file(tile_url)

In [5]:
tiles.head()

Unnamed: 0,quadkey,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,geometry
0,22133222313202,8630,3638,44,1,1,"POLYGON ((-160.00488 70.64723, -159.99939 70.6..."
1,22133222330023,597,597,43,1,1,"POLYGON ((-160.04333 70.63631, -160.03784 70.6..."
2,22133222330203,9183,2949,43,1,1,"POLYGON ((-160.04333 70.63266, -160.03784 70.6..."
3,22330200132223,4208,4032,27,1,1,"POLYGON ((-162.85583 68.07536, -162.85034 68.0..."
4,22332201321330,9971,3661,32,1,1,"POLYGON ((-162.52075 66.95158, -162.51526 66.9..."
