In [87]:
import pandas as pd
import numpy as np
import os
from scipy.interpolate import interp1d

We label appropriately the columns we need from the CSVs.

We also load the `.csv` files data set from the `/data/` folder of the repository.

Finally we prepare the data set to use timestamp as index of the dataframe.

In [88]:
WATER_LEVEL_COL="Water Level (meters) - Niveau d'eau (metres)"
DATE_COL="Date - Date"
OUTFLOW_COL="m^3/s"

# load csvs
cornwall_water_lvl = pd.read_csv("./data/raw/Cornwall_WaterLevelChanges_2024_CLEAN.csv")
longsault_water_lvl = pd.read_csv("./data/raw/LongSaultDam_WaterLevelChanges_2024_CLEAN.csv")
lakeontario_outflow_changes = pd.read_csv("./data/raw/LakeOntarioOutflowChanges_2024_CLEAN.csv")

# convert date columns to datetime
cornwall_water_lvl[DATE_COL] = pd.to_datetime(cornwall_water_lvl[DATE_COL])
longsault_water_lvl[DATE_COL] = pd.to_datetime(longsault_water_lvl[DATE_COL])
lakeontario_outflow_changes[DATE_COL] = pd.to_datetime(lakeontario_outflow_changes[DATE_COL])

# set date columns as index
cornwall_water_lvl.set_index(DATE_COL, inplace=True)
longsault_water_lvl.set_index(DATE_COL, inplace=True)
lakeontario_outflow_changes.set_index(DATE_COL, inplace=True)

In [89]:
# reindex to hourly frequency and fill missing timestamps with the previous known value
start = lakeontario_outflow_changes.index[0]
# end is set to the last timestamp in 2024 plus one minute since datasets are recorded hourly plus one minute
end = pd.Timestamp("2024-12-31 23:01:00")

hourly_index = pd.date_range(start=start, end=end, freq="h")
lakeontario_outflow_changes = lakeontario_outflow_changes.reindex(hourly_index)
lakeontario_outflow_changes = lakeontario_outflow_changes.ffill()

In [90]:
os.makedirs("./data/processed/", exist_ok=True)
lakeontario_outflow_changes.to_csv("./data/processed/LakeOntarioOutflowChanges_2024_HOURLY.csv")

In [105]:
# Convert datetime index to numeric values representing minutes since start
outflow_minutes = (lakeontario_outflow_changes.index - lakeontario_outflow_changes.index[0]).total_seconds() / 60
cornwall_minutes = (cornwall_water_lvl.index - cornwall_water_lvl.index[0]).total_seconds() / 60
longsault_minutes = (longsault_water_lvl.index - longsault_water_lvl.index[0]).total_seconds() / 60

# Create cubic interpolating functions
outflow_interpolation = interp1d(outflow_minutes, lakeontario_outflow_changes[OUTFLOW_COL], kind='cubic', fill_value="extrapolate")
longsault_water_lvl_interpolation = interp1d(longsault_minutes, longsault_water_lvl[WATER_LEVEL_COL], kind='cubic', fill_value="extrapolate")
cornwall_water_lvl_interpolation = interp1d(cornwall_minutes, cornwall_water_lvl[WATER_LEVEL_COL], kind='cubic', fill_value="extrapolate")

# Query at new timestamps
start = pd.Timestamp("2024-01-01 00:00:00")
end = pd.Timestamp("2025-01-01 23:00:00")
new_times = pd.date_range(start=start, end=end, freq="h")
new_minutes = (new_times - new_times[0]).total_seconds() / 60

# Get interpolated values
outflow_values = outflow_interpolation(new_minutes)
longsault_water_lvl_values = longsault_water_lvl_interpolation(new_minutes)
cornwall_water_lvl_values = cornwall_water_lvl_interpolation(new_minutes)

# Create new DataFrame
df_new = pd.DataFrame({
    "Qdot": outflow_values,
    "upstream": longsault_water_lvl_values,
    "downstream": cornwall_water_lvl_values
}, index=new_times)

Calculate the power for all these timestamps

In [None]:
# calculate the sluice gate
df_new["x"] = np.where(df_new["upstream"] - df_new["downstream"] <= 0, 0, 1)
# calculate the head difference between upstream and downstream
df_new["h"] = df_new["upstream"] - df_new["downstream"] # meters

# set values for power calculation
x = df_new["x"]
ro = 998 # kg/m^3
Qdot = df_new["Qdot"] # m^3/s
g = 9.81 # m/s^2
h = df_new["h"] # meters

df_new["P"] = x * ro * Qdot * g * h
df_new["E"] = df_new["P"] * 3600  # Joules
df_new.loc[df_new.index[0], "E"] = 0 # Set the first value to 0
df_new.to_csv("./data/processed/hydro_data_2024_hourly.csv")


Unnamed: 0,Outflow (m^3/s),Longsault Water Level (meters),Cornwall Water Level (meters)
2024-01-01 00:00:00,7400.0,0.380000,0.410000
2024-01-01 01:00:00,7400.0,0.380000,0.412000
2024-01-01 02:00:00,7400.0,0.370000,0.411000
2024-01-01 03:00:00,7400.0,0.370000,0.414000
2024-01-01 04:00:00,7400.0,0.370000,0.416000
...,...,...,...
2025-01-01 19:00:00,6780.0,-49.279354,21109.955954
2025-01-01 20:00:00,6780.0,-56.548257,24598.009666
2025-01-01 21:00:00,6780.0,-64.489436,28450.569361
2025-01-01 22:00:00,6780.0,-73.132541,32685.749766
