In [None]:
import os.path
import pandas as pd
import numpy as np
from matplotlib import pyplot

from settings.settings import HISTORIC_DATA_FOLDER, GAS_PER_HOUR_FILE

%config IPCompleter.greedy=True
%matplotlib inline

In [None]:
def get_data_as_df(csv_file_name):
    dt_concat_format = '%d-%m-%Y %H:%M:%S'
    util_df = pd.read_csv(csv_file_name, sep=';', decimal=',')
    util_df['End Time'] = util_df['Tot Datum'] + " " + util_df['Tot Tijdstip']
    util_df['End Time'] = pd.to_datetime(util_df['End Time'], format=dt_concat_format)
    util_df['Start Time'] = util_df['Van Datum'] + " " + util_df['Van Tijdstip']
    util_df['Start Time'] = pd.to_datetime(util_df['Start Time'], format=dt_concat_format)
    util_df['DT delta'] = util_df['End Time'] - util_df['Start Time']
    util_df.drop(columns=['EAN', 'Meter', 'Metertype', 'Validatiestatus', 'Tot Datum', 'Tot Tijdstip', 'Van Datum', 'Van Tijdstip'], inplace=True)
    util_df.set_index('End Time', inplace=True)

    return util_df

In [None]:
df_gas = get_data_as_df(os.path.join(HISTORIC_DATA_FOLDER, GAS_PER_HOUR_FILE))
m3_filter = df_gas['Eenheid'].isin(['m³'])
df_gas = df_gas[m3_filter]

In [None]:
expected_gas_interval = np.timedelta64(1, 'h')
unexpected_interval = df_gas.loc[df_gas['DT delta'] != expected_gas_interval]
print('!! be aware !!')
print('unexpected interval, possibly caused by summer/winter time')
unexpected_interval

In [None]:
def add_week_number_column(df_in, year):
    df_in = df_in.loc[year]
    df_in['Week Number'] = df_in.index.isocalendar().week
    return df_in

df_gas = add_week_number_column(df_gas, '2022')

In [None]:
def add_how_column(df_in):
    df_temp = df_in.copy()
    df_temp['HOW'] = df_temp.index.dayofweek*24 + df_temp.index.hour
    df_temp.set_index('HOW', inplace=True)
    df_temp.drop(columns=['Register', 'Calorische Bovenwaarde', 'DT delta'], inplace=True)
    return df_temp

In [None]:
gas_hours_per_day = dict()

for g in df_gas.groupby('Week Number'):
    gas_hours_per_day[g[0]] = add_how_column(g[1])


In [None]:
fig, axes = pyplot.subplots(sharex=True, figsize=(40, 8))
for week_number, how_df in gas_hours_per_day.items():
    if week_number > 44:
        how_df['Volume'].plot(grid=True, label=week_number)

axes.set_title('Gas volume per hour of week - week by week overlay')
axes.legend()
pyplot.show()

In [None]:
how_concat = pd.concat(gas_hours_per_day.values())
sum_how = how_concat.groupby('HOW')['Volume'].sum()

In [None]:
fig, axes = pyplot.subplots(sharex=True, figsize=(40, 8))
sum_how.plot.bar()

axes.set_title('Sum gas volume per hour of week')
axes.legend()
pyplot.show()