# 1. Setup

In [None]:
import json
from datetime import date, time

import colorcet as cc
import holidays
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib.transforms as mt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
from IPython.display import Image
from matplotlib.dates import DateFormatter, MonthLocator
from matplotlib.lines import Line2D
from matplotlib.ticker import MultipleLocator
from pandas.core.groupby.generic import DataFrameGroupBy
from prophet import Prophet
from scipy.ndimage import binary_dilation
from shapely.geometry import shape
from statsmodels.tsa.seasonal import MSTL, seasonal_decompose

In [None]:
RAND = 10
RAW_DATA = "../raw_data/"

In [None]:
from utils.load import load_all_raw_data
from utils.process import process_all_dfs
from utils.merge import merge_all_dfs

In [None]:
df = merge_all_dfs(process_all_dfs(load_all_raw_data(RAW_DATA)))

In [None]:
df.h2_winddirection_10m.max()

360.0

In [None]:
df.dtypes

county                                        category
product_type                                  category
is_business                                   category
is_consumption                                category
datetime                                datetime64[ns]
target                                         float32
data_block_id                                   uint16
eic_count                                      float64
installed_capacity                             float32
lowest_price_per_mwh                           float32
highest_price_per_mwh                          float32
euros_per_mwh                                  float32
f1_temperature                                 float32
f1_dewpoint                                    float32
f1_cloudcover_low                                uint8
f1_cloudcover_mid                                uint8
f1_cloudcover_high                               uint8
f1_cloudcover_total                              uint8
f1_10_metr

In [None]:
df.isna().sum()

county                                     0
product_type                               0
is_business                                0
is_consumption                             0
datetime                                   0
target                                     0
data_block_id                              0
eic_count                               5710
installed_capacity                      5710
lowest_price_per_mwh                       0
highest_price_per_mwh                      0
euros_per_mwh                              0
f1_temperature                             0
f1_dewpoint                                0
f1_cloudcover_low                          0
f1_cloudcover_mid                          0
f1_cloudcover_high                         0
f1_cloudcover_total                        0
f1_10_metre_u_wind_component               0
f1_10_metre_v_wind_component               0
f1_direct_solar_radiation                  0
f1_surface_solar_radiation_downwards       0
f1_snowfal

In [None]:
CATEGORICAL_DICT = {
    "county": county_id_to_name_map,
    "is_business": {0: "not_business", 1: "business"},
    "is_consumption": {0: "production", 1: "consumption"},
    "product_type": {
        0: "combined",
        1: "fixed",
        2: "general_service",
        3: "spot",
    },
}

PALETTE = sns.color_palette(
    cc.glasbey[:4]
    + [cc.glasbey[8]]
    + cc.glasbey[5:8]
    + [cc.glasbey[4]]
    + [cc.glasbey[12]]
    + cc.glasbey[10:12]
    + [cc.glasbey[9]]
    + cc.glasbey[13:16]
).as_hex()

SEGMENT_C = ["county", "product_type", "is_business"]
CATEGORICAL_C = ["county", "product_type", "is_business", "is_consumption"]
TARGET_C = [
    "county",
    "product_type",
    "is_business",
    "is_consumption",
    "datetime",
]

In [None]:
pd.set_option(
    "display.float_format",
    lambda x: f"{x:.2e}" if abs(x) < 0.01 and x != 0 else f"{x:.2f}",
)
pd.set_option("display.max_columns", None)

In [None]:
sns.set_style("whitegrid")