In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
from ydata_profiling import ProfileReport
import os
import warnings
import sys

sys.path.append("../")
from pipeline import data
from pipeline.config import CONF

# To suppress all warnings
warnings.filterwarnings("ignore")

# black is a code formatter (see https://github.com/psf/black).
# It will automatically format the code you write in the cells imposing consistent Python style.
%load_ext jupyter_black
# matplotlib style file
# Template for style file: https://matplotlib.org/stable/tutorials/introductory/customizing.html#customizing-with-style-sheets
plt.style.use("../matplotlib_style.txt")

### Raw data inspection

In [None]:
# Load data
Installed_Capacity_Germany, Prices_Europe, Realised_Supply_Germany, Realised_Demand_Germany, Weather_Data_Germany = (
    data.load_data(CONF, data_type="raw")
)

# Generate profile reports
Installed_Capacity_Germany_Profile = ProfileReport(
    Installed_Capacity_Germany, title="Raw Installed Capacity Germany"
)
Installed_Capacity_Germany_Profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Installed_Capacity_Germany.html",
    )
)

Prices_Europe_Profile = ProfileReport(Prices_Europe, title="Raw Prices Europe")
Prices_Europe_Profile.to_file(os.path.join(CONF.data.raw_inspection_dir, "Prices_Europe.html"))

Realised_Supply_Germany_Profile = ProfileReport(Realised_Supply_Germany, title="Raw Realised Supply Germany")
Realised_Supply_Germany_Profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Realised_Supply_Germany.html",
    )
)

Realised_Demand_Germany_Profile = ProfileReport(Realised_Demand_Germany, title="Raw Realised Demand Germany")
Realised_Demand_Germany_Profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Realised_Demand_Germany.html",
    )
)

Weather_Data_Germany_Profile = ProfileReport(
    Weather_Data_Germany, title="Raw Weather Data Germany", minimal=True
)
Weather_Data_Germany_Profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Weather_Data_Germany.html",
    )
)


### Raw data processing

In [None]:
data.process_na_values(Installed_Capacity_Germany, CONF).to_csv(
    os.path.join(CONF.data.preprocessed_data_dir, "Installed_Capacity_Germany.csv"),
    index=False,
)
print("Installed_Capacity_Germany Preprocessed")

data.process_na_values(Prices_Europe, CONF).to_csv(
    os.path.join(CONF.data.preprocessed_data_dir, "Prices_Europe.csv"), index=False
)
print("Prices_Europe Preprocessed")

data.process_na_values(Realised_Supply_Germany, CONF).to_csv(
    os.path.join(CONF.data.preprocessed_data_dir, "Realised_Supply_Germany.csv"),
    index=False,
)
print("Realised_Supply_Germany Preprocessed")

data.process_na_Values(Realised_Demand_Germany, CONF).to_csv(
    os.path.join(CONF.data.preprocessed_data_dir, "Realised_Demand_Germany.csv"),
    index=False,
)
print("Realised_Demand_Germany Preprocessed")

data.process_na_values(Weather_Data_Germany, CONF).to_csv(
    os.path.join(CONF.data.preprocessed_data_dir, "Weather_Data_Germany.csv"),
    index=False,
)
print("Weather_Data_Germany Preprocessed")


### Processed data inspection

In [None]:
# Load data
Installed_Capacity_Germany, Prices_Europe, Realised_Supply_Germany, Realised_Demand_Germany, Weather_Data_Germany = (
    data.load_data(CONF, data_type="preprocessed")
)

# Generate profile reports
Installed_Capacity_Germany_Profile = ProfileReport(
    Installed_Capacity_Germany, title="Preprocessed Installed Capacity Germany"
)
Installed_Capacity_Germany_Profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Installed_Capacity_Germany.html",
    )
)

Prices_Europe_Profile = ProfileReport(Prices_Europe, title="Preprocessed Prices Europe")
Prices_Europe_Profile.to_file(
    os.path.join(CONF.data.preprocessed_data_inspection_dir, "Prices_Europe.html")
)

Realised_Supply_Germany_Profile = ProfileReport(
    Realised_Supply_Germany, title="Preprocessed Realised Supply Germany"
)
Realised_Supply_Germany_Profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Realised_Supply_Germany.html",
    )
)

Realised_Demand_Germany_Profile = ProfileReport(
    Realised_Demand_Germany, title="Preprocessed Realised Demand Germany"
)
Realised_Demand_Germany_Profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Realised_Demand_Germany.html",
    )
)

Weather_Data_Germany_Profile = ProfileReport(
    Weather_Data_Germany, title="Preprocessed Weather Data Germany", minimal=True
)
Weather_Data_Germany_Profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Weather_Data_Germany.html",
    )
)
