In [None]:
# ! pip install pandas geopandas

# Import Packages

In [None]:
import os
import pandas as pd
import geopandas as gpd
from dotenv import load_dotenv
import matplotlib.pyplot as plt

# Set-up Environment

In [None]:
# load the .env file variables 
load_dotenv()

CACHE_STORAGE_DIR = os.getenv("CACHE_STORAGE_DIR")

ECCC_CACHE_HOURLY_DOWNLOADED_DATA_LIST_FILE = f"{CACHE_STORAGE_DIR}{os.sep}eccc_hourly_downloaded_data_no_dupe_loc.csv"

# Data Loading

In [None]:
eccc_data_exits_df = pd.read_csv(
    ECCC_CACHE_HOURLY_DOWNLOADED_DATA_LIST_FILE
)
eccc_data_exits_df

In [None]:
eccc_data_exits_gdf = gpd.GeoDataFrame(
    eccc_data_exits_df,
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(
        eccc_data_exits_df['Longitude'], 
        eccc_data_exits_df['Latitude']
    ),
)


In [None]:
# get utm crs
utm_crs = eccc_data_exits_gdf.estimate_utm_crs()
utm_crs

In [None]:
# set to utm crs
eccc_data_exits_gdf = eccc_data_exits_gdf.to_crs(
    utm_crs
)

# Data Extraction

In [None]:
closest_station_distance = []
for index, geom in enumerate(eccc_data_exits_gdf.geometry):
    # discard the current staion
    other_stations = eccc_data_exits_gdf[['geometry']][eccc_data_exits_gdf.index != index]
    # get distance
    distance = eccc_data_exits_gdf.iloc[index].geometry.distance(other_stations.geometry).min()
    # skip if closest station
    if distance == 0:
        continue
    # add distance
    closest_station_distance.append(distance)

In [None]:
distance_col = "Distance in km"
distance_df = pd.DataFrame(data = {
    distance_col: closest_station_distance
})

# in meters
distance_df[distance_col] = distance_df[distance_col] / 1000

# get stats
mean_distance = distance_df[distance_col].mean()
median_distance = distance_df[distance_col].median()
quartile_90_distance = distance_df[distance_col].quantile(0.9)


# Visualization

In [None]:
# plot the dat
ax = distance_df[distance_col].plot(
    kind = 'hist',
    bins = 50,
    figsize = (15, 3),
)

ax.set_xlabel(
    "Distance in km", 
    fontsize = 14
)

ax.tick_params(
    axis = 'x', 
    labelsize = 14
) 

ax.set_ylabel(
    "Frequency", 
    fontsize = 14
)

ax.tick_params(
    axis = 'y', 
    labelsize = 14
) 

plt.axvline(
    mean_distance, 
    color = 'red', 
    linestyle = 'dashed', 
    linewidth = 2, 
    label = f'Mean: {mean_distance:.2f} km'
)

plt.axvline(
    quartile_90_distance, 
    color = 'darkRed', 
    linestyle = 'dashed', 
    linewidth = 2, 
    label = f'Quartile 90%: {quartile_90_distance:.2f} km'
)


plt.legend(
    fontsize = 16
)

plt.savefig(
    f"../../assets/figures/closest_station_distance_hist.png", 
    transparent = True,
    bbox_inches = 'tight', # compress the content  
    pad_inches = 0.05, # have no extra margin
)

plt.show()