In [None]:
import os
from glob import glob
import re
from pickle import load

import pandas as pd
import numpy as np
from matplotlib import pyplot

from settings.settings import HISTORIC_DATA_FOLDER

%config IPCompleter.greedy=True
%matplotlib inline

file_list = glob('climate_month_*.pickle', root_dir=HISTORIC_DATA_FOLDER)
day_list = glob('climate_20*-*-*.pickle', root_dir=HISTORIC_DATA_FOLDER)

In [None]:
def get_year_and_month_from_file_name(file_name):
    month = re.search('202\d-\d\d', file_name).group()
    return month

In [None]:
def get_daily_average_temperature(df_day):
    try:
        return df_day['Temp'].mean()
    except TypeError:
        assert df_day is None
        return None

In [None]:
def add_week_number_column(df_in, year):
    df_in = df_in.loc[year]
    df_in['Week Number'] = df_in.index.isocalendar().week
    df_in.set_index('Week Number', inplace=True)
    
    return df_in

In [None]:
all_weather = {
    'date': [],
    'av_temp': []
}

for name in sorted(file_list):
    yr_month = get_year_and_month_from_file_name(name)
    with open(os.path.join(HISTORIC_DATA_FOLDER, name), 'rb') as fh:
        month_dict = load(fh)
    for key in month_dict.keys():
        date_as_string = f'{yr_month}-{key:02}'
        all_weather['date'].append(date_as_string)
        all_weather['av_temp'].append(get_daily_average_temperature(month_dict[key]))

df_temp = pd.DataFrame.from_dict(all_weather)

dt_concat_format = '%d-%m-%Y %H:%M:%S'
df_temp['date'] = pd.to_datetime(df_temp['date'], format='%Y-%m-%d')
df_temp.set_index('date', inplace=True)
print(df_temp.shape)
df_temp

In [None]:
ax = df_temp.plot(grid=True, title='Daily average temperature.')
ax.set_ylabel('Temperature (°C)')

In [None]:
temp_per_week = df_temp.resample('w').mean(numeric_only=True)
av_temp_per_week_2021 = add_week_number_column(temp_per_week, '2021')
av_temp_per_week_2022 = add_week_number_column(temp_per_week, '2022')

# last week of 2021 is according to pandas in the year 2022. Move it to 2021, to prevent weird plotting.
av_temp_per_week_2021.loc[52] = av_temp_per_week_2022.loc[52]
av_temp_per_week_2022.drop(52, inplace=True)

In [None]:
fig, axes = pyplot.subplots(figsize=(30, 10))
ax = av_temp_per_week_2021['av_temp'].plot(label='per week 2021', grid=True)
bx = av_temp_per_week_2022['av_temp'].plot(label='per week 2022', grid=True,
                                           title='Weekly average temperature.', style='o-')
bx.set_ylabel('Temperature (°C)')
axes.legend()
pyplot.show()

In [None]:
per_hour = list()
for name in sorted(file_list):
    yr_month = get_year_and_month_from_file_name(name)
    with open(os.path.join(HISTORIC_DATA_FOLDER, name), 'rb') as fh:
        month_dict = load(fh)
    for day, value in month_dict.items():
        try:
            per_hour.append(value)
        except AttributeError:
            print(f'No dataframe for {yr_month}-{day:02}')

##### Append uncompleted pickled months

for name in sorted(day_list):
    yr_m_d = re.search('202\d-\d\d-\d\d', name).group()

    with open(os.path.join(HISTORIC_DATA_FOLDER, name), 'rb') as fh:
        df_day_hour = load(fh)

    try:
        per_hour.append(df_day_hour)
    except AttributeError:
        print(f'No dataframe in file {name}')

temperature_per_hour = pd.concat(per_hour)

In [None]:
ax = temperature_per_hour['Temp'].plot(title='Hourly temperature')
ax.set_ylabel('Temperature (°C)')

In [None]:
last_x_hrs = 120
ax = temperature_per_hour[-last_x_hrs:]['Temp'].plot(title=f'Hourly temperature, last {last_x_hrs} hours.')
ax.set_ylabel('Temperature (°C)')