In [None]:
import pandas as pd
import os
import pytz
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

from datetime import datetime, timedelta
from dateutil import rrule

In [None]:
'''
Function to cycle through the folder of Ammonium, Nitrate, Oxygen_A, Oxygen_B and Phosphate CSVs and convert them to Series.

Return: Pandas Series for each above mentioned
'''

# change the relative path according
folder_path = "../data/important/"
files = os.listdir(folder_path)

for file_name in files:
    df = pd.read_csv(f"../data/important/{file_name}")

    df.index = pd.to_datetime(df['datumBeginMeting'])
    df.index.name = None
    df.drop(columns=['datumEindeMeting', 'datumBeginMeting'], inplace=True)

    df_to_series = df["hstWaarde"]
    filename = df['historianTagnummer'].iloc[0].split('.')[0] + f'_{file_name}'

    df_to_series.to_csv(filename, header=True)
    df_to_series

In [None]:
'''
Return: Pandas Series for each above mentioned
'''
folder_path = "../data/converted/"
files = os.listdir(folder_path)

for file_name in files:
    df = pd.read_csv(f"../data/converted/{file_name}", parse_dates=True, index_col=0)

    df.index = pd.to_datetime(df.index)

    minutely_index = pd.date_range(start=df.index.min(), end=df.index.max(), freq='T')
    new_df = pd.DataFrame(index=minutely_index)

    merged_df = new_df.join(df, how='left').interpolate(method='time')

    merged_df = merged_df.groupby(merged_df.index).mean()

    merged_df.to_csv(f"../data/resampled/{file_name}")

In [None]:
df = pd.read_csv("../data/converted/EDE_B121069913_K600_ammonium.csv", index_col=0)

df.index = [pytz.timezone('Europe/Amsterdam').localize(datetime.fromisoformat(t)) for t in df.index]
df.set_index(df.index.tz_convert('UTC'), inplace=True)

In [None]:
folder_path = "../data/resampled/"
files = os.listdir(folder_path)

merged = pd.DataFrame()

for file_name in files:
    df = pd.read_csv(f"../data/resampled/{file_name}", index_col=0)
    df.index = pd.to_datetime(df.index)
    # df = df.resample('2min').mean()
    df = df.rename(columns={"hstWaarde": file_name})
    merged = pd.merge(df, merged, left_index=True, right_index=True, how='outer')

merged.to_csv("../data/merged.csv")

In [None]:
df = pd.read_csv(f"../data/merged.csv", index_col=0)

df_corr = df.corr()

fig = go.Figure()
fig.add_trace(
    go.Heatmap(
        x = df_corr.columns,
        y = df_corr.index,
        z = np.array(df_corr)
    )
)
