In [1]:
import sys
import os
import pandas as pd
import numpy as np
from pyprojroot import here

sys.path.insert(0, os.path.join(here()))

from src.utils.EDAUtils import WpEDS

import warnings
warnings.filterwarnings('ignore')

## plotting libraries and setting
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc = {'figure.figsize':(24, 12)})
# small_size = 8
# medium_szie = 10
big_size = 18
plt.rc('font', size=big_size)          # controls default text sizes
plt.rc('axes', titlesize=big_size)     # fontsize of the axes title
plt.rc('axes', labelsize=big_size)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=big_size)    # fontsize of the tick labels
plt.rc('ytick', labelsize=big_size)    # fontsize of the tick labels
plt.rc('legend', fontsize=big_size)    # legend fontsize
plt.rc('figure', titlesize=big_size)  # fontsize of the figure title

In [3]:
def extract_seasons_from_df(df):
    """Argument: a dataframe with a timestamp index"""
    tmp_list = []
    for year in df.index.year.unique():
        year = str(year)
        seasons = {
            "spring": pd.date_range(
                start="20/03/" + year, end="21/06/" + year, freq="h"
            ),
            "summer": pd.date_range(
                start="21/06/" + year, end="22/09/" + year, freq="h"
            ),
            "autumn": pd.date_range(
                start="22/09/" + year, end="20/12/" + year, freq="h"
            ),
        }
        tmp_list.append(seasons)
        df.loc[df.index.isin(seasons["spring"]), "season"] = "spring"
        df.loc[df.index.isin(seasons["summer"]), "season"] = "summer"
        df.loc[df.index.isin(seasons["autumn"]), "season"] = "autumn"

    df["season"] = df["season"].fillna("winter")

In [2]:
df_train = pd.read_parquet(here("data/transformed/train/train.parquet"))
print("train:", df_train.shape)

df_test = pd.read_parquet(here("data/transformed/test/test.parquet"))
print("test:", df_test.shape)

df_wf1 = pd.read_parquet(here("data/transformed/windforecasts/windforecasts_wf1.parquet"))
print("wf1:",df_wf1.shape)

train: (18757, 7)
test: (7488, 1)
wf1: (104832, 5)


In [4]:
extract_seasons_from_df(df_train)

In [7]:
df_train_summer = df_train[df_train["season"] == "summer"]
df_train_winter = df_train[df_train["season"] == "winter"]
df_train_spring = df_train[df_train["season"] == "spring"]
df_train_autumn = df_train[df_train["season"] == "autumn"]

#### checking the seasonal info

In [8]:
print("num summer samples:", df_train_summer.shape)
print("num winter samples:", df_train_winter.shape)
print("num spring samples:", df_train_spring.shape)
print("num autumn samples:", df_train_autumn.shape)

num summer samples: (5245, 8)
num winter samples: (4174, 8)
num spring samples: (4140, 8)
num autumn samples: (5198, 8)


In [11]:
print("Total MW summer:\n", df_train_summer.sum())
print("Total MW winter:\n", df_train_winter.sum())
print("Total MW spring:\n", df_train_spring.sum())
print("Total MW autumn:\n", df_train_autumn.sum())

Total MW summer:
 wp1                                                 928.917
wp2                                                 983.232
wp3                                                 981.987
wp4                                                 952.957
wp5                                                 927.971
wp6                                                 908.011
wp7                                                 840.462
season    summersummersummersummersummersummersummersumm...
dtype: object
Total MW winter:
 wp1                                                1220.523
wp2                                                1190.138
wp3                                                1757.074
wp4                                                1485.074
wp5                                                 1289.72
wp6                                                1520.347
wp7                                                1489.308
season    winterwinterwinterwinterwinterwinterwint