# Extracting datasets

## Relevant Imports

In [1]:
%matplotlib inline

from datetime import datetime

import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio

from adjustText import adjust_text
from shapely.geometry import Point, box
from rasterio.mask import mask

In [5]:
# Filepaths
raster_file = r'SVDNB_npp_20230101-20230131_75N060E_vcmcfg_v10_c202302080600.avg_rade9h.tif'
boundary_file = r'boundaries\geoBoundaries-JPN-ADM0.geojson'
output_csv = r'Germany_light_intensity.csv'

# Load Germany's Boundary
Germany = gpd.read_file(boundary_file)
Germany = Germany.to_crs(epsg=4326)

print(Germany.total_bounds)
# Open the Raster and Check Overlap
with rasterio.open(raster_file) as src:
    raster_bounds = box(*src.bounds)
    print("Raster Bounds:", src.bounds)
    print("Germany Bounds:", Germany.total_bounds)

    if not raster_bounds.intersects(Germany.unary_union):
        raise ValueError("Germany's boundary does not overlap with the raster extent.")

    # Clip the raster
    Germany_geom_list = [feature["geometry"] for feature in Germany.__geo_interface__["features"]]
    clipped_raster, clipped_transform = mask(src, Germany_geom_list, crop=True)

# Extract Raster Values
light_intensity = clipped_raster[0]
rows, cols = np.where(~np.isnan(light_intensity))
values = light_intensity[rows, cols]
x_coords, y_coords = rasterio.transform.xy(clipped_transform, rows, cols)

data = pd.DataFrame({
    'longitude': x_coords,
    'latitude': y_coords,
    'light_intensity': values
})
data.to_csv(output_csv, index=False)
print(f"Extracted data saved to {output_csv}")


[122.93391306  24.04561583 153.98667512  45.55723905]
Raster Bounds: BoundingBox(left=59.99791666665, bottom=0.0020827333499937595, right=179.99791762665, top=75.00208333335)
Germany Bounds: [122.93391306  24.04561583 153.98667512  45.55723905]


  if not raster_bounds.intersects(Germany.unary_union):


: 

## Ookla Speedtest Data

In [2]:
def quarter_start(year: int, q: int) -> datetime:
    if not 1 <= q <= 4:
        raise ValueError("Quarter must be within [1, 2, 3, 4]")

    month = [1, 4, 7, 10]
    return datetime(year, month[q - 1], 1)


def get_tile_url(service_type: str, year: int, q: int) -> str:
    dt = quarter_start(year, q)

    base_url = "https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance"
    url = f"{base_url}/type%3D{service_type}/year%3D{dt:%Y}/quarter%3D{q}/{dt:%Y-%m-%d}_performance_{service_type}_tiles.zip"
    return url

In [3]:
tile_url = get_tile_url("fixed", 2020, 2)
tile_url

'https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance/type%3Dfixed/year%3D2020/quarter%3D2/2020-04-01_performance_fixed_tiles.zip'

In [4]:
tiles = gp.read_file(tile_url)

In [5]:
tiles.head()

Unnamed: 0,quadkey,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,geometry
0,22133222313202,8630,3638,44,1,1,"POLYGON ((-160.00488 70.64723, -159.99939 70.6..."
1,22133222330023,597,597,43,1,1,"POLYGON ((-160.04333 70.63631, -160.03784 70.6..."
2,22133222330203,9183,2949,43,1,1,"POLYGON ((-160.04333 70.63266, -160.03784 70.6..."
3,22330200132223,4208,4032,27,1,1,"POLYGON ((-162.85583 68.07536, -162.85034 68.0..."
4,22332201321330,9971,3661,32,1,1,"POLYGON ((-162.52075 66.95158, -162.51526 66.9..."


## VIIRS Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Sample data: approval times (in hours) over 30 days
approval_times = [
    22, 23, 25, 24, 26, 27, 22, 20, 25, 24, 28, 23, 22, 21, 18, 
    27, 25, 24, 19, 29, 30, 25, 26, 22, 21, 24, 23, 28, 29, 27
]

# Calculate mean and standard deviation
mean_approval_time = np.mean(approval_times)
std_dev_approval_time = np.std(approval_times)

# Calculate control limits
UCL = mean_approval_time + 3 * std_dev_approval_time
LCL = mean_approval_time - 3 * std_dev_approval_time

# Generate x-axis values (days)
days = np.arange(1, len(approval_times) + 1)

# Plot the control chart
plt.figure(figsize=(10, 6))
plt.plot(days, approval_times, marker='o', label='Approval Times', color='blue')
plt.axhline(mean_approval_time, color='green', linestyle='--', label='Mean')
plt.axhline(UCL, color='red', linestyle='--', label='UCL (Upper Control Limit)')
plt.axhline(LCL, color='red', linestyle='--', label='LCL (Lower Control Limit)')

# Chart formatting
plt.title('Control Chart for Approval Times')
plt.xlabel('Days')
plt.ylabel('Approval Time (hours)')
plt.xticks(days)
plt.legend()
plt.grid(True)

# Show the plot
plt.tight_layout()
plt.show()
