In [7]:
# In order to run this script, ensure you have installed the required dependencies in the requirements.txt file.
# For more information, refer to the README.md file.
import pandas as pd
import numpy as np
import os
from scipy.interpolate import interp1d

In [12]:
DATE_COL="Date - Date"

# load csvs
maximum_water_level = pd.read_csv("./data/raw/LakeOntario_HistoricalMax_WaterLevel.csv")
minimum_water_level = pd.read_csv("./data/raw/LakeOntario_HistoricalMin_WaterLevel.csv")

maximum_outflow = pd.read_csv("./data/raw/LakeOntario_HistoricalMax_WaterOutflow.csv")
minimum_outflow = pd.read_csv("./data/raw/LakeOntario_HistoricalMin_WaterOutflow.csv")

# convert date columns to datetime
maximum_water_level[DATE_COL] = pd.to_datetime(maximum_water_level[DATE_COL])
minimum_water_level[DATE_COL] = pd.to_datetime(minimum_water_level[DATE_COL])

maximum_outflow[DATE_COL] = pd.to_datetime(maximum_outflow[DATE_COL])
minimum_outflow[DATE_COL] = pd.to_datetime(minimum_outflow[DATE_COL])


# set date columns as index
maximum_water_level.set_index(DATE_COL, inplace=True)
minimum_water_level.set_index(DATE_COL, inplace=True)
maximum_outflow.set_index(DATE_COL, inplace=True)
minimum_outflow.set_index(DATE_COL, inplace=True)

make it hourly

In [13]:
# reindex to hourly frequency and fill missing timestamps with the previous known value
start = pd.Timestamp("2024-01-01 00:00:00")
# end is set to the last timestamp in 2024 plus one minute since datasets are recorded hourly plus one minute
end = pd.Timestamp("2025-01-01 00:00:00")
hourly_index = pd.date_range(start=start, end=end, freq="h")

maximum_water_level = maximum_water_level.reindex(hourly_index)
maximum_water_level = maximum_water_level.ffill()

minimum_water_level = minimum_water_level.reindex(hourly_index)
minimum_water_level = minimum_water_level.ffill()

maximum_outflow = maximum_outflow.reindex(hourly_index)
maximum_outflow = maximum_outflow.ffill()

minimum_outflow = minimum_outflow.reindex(hourly_index)
minimum_outflow = minimum_outflow.ffill()


In [14]:
df_new = pd.DataFrame(index=hourly_index)
MAX_WATER_LEVEL_COL = "Maximum Water Level (meters)"
MIN_WATER_LEVEL_COL = "Minimum Water Level (meters)"
MAX_OUTFLOW_COL = "Maximum Outflow (m^3/s)"
MIN_OUTFLOW_COL = "Minimum Outflow (m^3/s)"

df_new[MAX_WATER_LEVEL_COL] = maximum_water_level["m"]
df_new[MIN_WATER_LEVEL_COL] = minimum_water_level["m"]
df_new[MAX_OUTFLOW_COL] = maximum_outflow["m^3/s"]
df_new[MIN_OUTFLOW_COL] = minimum_outflow["m^3/s"]

df_new

Unnamed: 0,Maximum Water Level (meters),Minimum Water Level (meters),Maximum Outflow (m^3/s),Minimum Outflow (m^3/s)
2024-01-01 00:00:00,75.19,73.74,10470.0,4590.0
2024-01-01 01:00:00,75.19,73.74,10470.0,4590.0
2024-01-01 02:00:00,75.19,73.74,10470.0,4590.0
2024-01-01 03:00:00,75.19,73.74,10470.0,4590.0
2024-01-01 04:00:00,75.19,73.74,10470.0,4590.0
...,...,...,...,...
2024-12-31 20:00:00,75.18,73.74,9540.0,4790.0
2024-12-31 21:00:00,75.18,73.74,9540.0,4790.0
2024-12-31 22:00:00,75.18,73.74,9540.0,4790.0
2024-12-31 23:00:00,75.18,73.74,9540.0,4790.0


In [15]:
df_new.to_csv("./data/processed/Historical_Levels_and_Outflow_Limits_For_2024_HOURLY.csv")