In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
from ydata_profiling import ProfileReport
import os
import warnings
import sys

sys.path.append("../")
from pipeline import data
from pipeline.config import CONF

# To suppress all warnings
warnings.filterwarnings("ignore")

# black is a code formatter (see https://github.com/psf/black).
# It will automatically format the code you write in the cells imposing consistent Python style.
%load_ext jupyter_black
# matplotlib style file
# Template for style file: https://matplotlib.org/stable/tutorials/introductory/customizing.html#customizing-with-style-sheets
plt.style.use("../matplotlib_style.txt")

### Raw data inspection

In [None]:
installed_capacity, prices, realised_supply, realised_demand, weather_data = (
    data.load_raw_data(CONF)
)

In [None]:
# Generate profile reports and display them in the notebook
installed_capacity_profile = ProfileReport(
    installed_capacity, title="Installed Capacity"
)
display(installed_capacity_profile)
installed_capacity_profile.to_file(
    os.path.join(CONF.data_processing.inspection_dir, "installed_capacity.html")
)

In [None]:
prices_profile = ProfileReport(prices, title="Prices Europe")
display(prices_profile)
prices_profile.to_file(os.path.join(CONF.data_processing.inspection_dir, "prices.html"))

In [None]:
realised_supply_profile = ProfileReport(realised_supply, title="Realised Supply")
display(realised_supply_profile)
realised_supply_profile.to_file(
    os.path.join(CONF.data_processing.inspection_dir, "realised_supply.html")
)

In [None]:
realised_demand_profile = ProfileReport(realised_demand, title="Realised Demand")
display(realised_demand_profile)
realised_demand_profile.to_file(
    os.path.join(CONF.data_processing.inspection_dir, "realised_demand.html")
)

In [None]:
weather_data_profile = ProfileReport(weather_data, title="Weather Data", minimal=True)
display(weather_data_profile)
weather_data_profile.to_file(
    os.path.join(CONF.data_processing.inspection_dir, "weather_data.html")
)

### Raw data processing

In [None]:
data.process_na_values(installed_capacity, CONF).to_csv(
    os.path.join(
        CONF.data_processing.preprocessed_data_dir, "Installed_Capacity_Germany.csv"
    )
)

In [None]:
data.process_na_values(prices, CONF).to_csv(
    os.path.join(CONF.data_processing.preprocessed_data_dir, "Prices_Europe.csv")
)

In [None]:
data.process_na_values(realised_supply, CONF).to_csv(
    os.path.join(
        CONF.data_processing.preprocessed_data_dir, "Realised_Supply_Germany.csv"
    )
)

In [None]:
data.process_na_values(realised_demand, CONF).to_csv(
    os.path.join(
        CONF.data_processing.preprocessed_data_dir, "Realised_Demand_Germany.csv"
    )
)

In [None]:
data.process_na_values(weather_data, CONF).to_csv(
    os.path.join(CONF.data_processing.preprocessed_data_dir, "Weather_Data_Germany.csv")
)