In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
from ydata_profiling import ProfileReport
import os
import warnings
import sys

sys.path.append("../")
from pipeline import data
from pipeline.config import CONF

# To suppress all warnings
warnings.filterwarnings("ignore")

# black is a code formatter (see https://github.com/psf/black).
# It will automatically format the code you write in the cells imposing consistent Python style.
%load_ext jupyter_black
# matplotlib style file
# Template for style file: https://matplotlib.org/stable/tutorials/introductory/customizing.html#customizing-with-style-sheets
plt.style.use("../matplotlib_style.txt")

### Raw data inspection

In [None]:
# Load data
installed_capacity, prices, realised_supply, realised_demand, weather_data = (
    data.load_data(CONF, data_type="raw")
)

# Generate profile reports
installed_capacity_profile = ProfileReport(
    installed_capacity, title="Raw Installed Capacity"
)
installed_capacity_profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Installed_Capacity_Germany.html",
    )
)

prices_profile = ProfileReport(prices, title="Raw Prices Europe")
prices_profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir, "Prices_Europe.html"
    )
)

realised_supply_profile = ProfileReport(realised_supply, title="Raw Realised Supply")
realised_supply_profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Realised_Supply_Germany.html",
    )
)

realised_demand_profile = ProfileReport(realised_demand, title="Raw Realised Demand")
realised_demand_profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Realised_Demand_Germany.html",
    )
)

weather_data_profile = ProfileReport(weather_data, title="Raw Weather Data", minimal=True)
weather_data_profile.to_file(
    os.path.join(
        CONF.data.raw_inspection_dir,
        "Weather_Data_Germany.html",
    )
)

### Raw data processing

In [None]:
data.process_na_values(installed_capacity, CONF).to_csv(
    os.path.join(
        CONF.data_processing.preprocessed_data_dir, "Installed_Capacity_Germany.csv"
    )
)

data.process_na_values(prices, CONF).to_csv(
    os.path.join(CONF.data_processing.preprocessed_data_dir, "Prices_Europe.csv")
)

data.process_na_values(realised_supply, CONF).to_csv(
    os.path.join(
        CONF.data_processing.preprocessed_data_dir, "Realised_Supply_Germany.csv"
    )
)

data.process_na_values(realised_demand, CONF).to_csv(
    os.path.join(
        CONF.data_processing.preprocessed_data_dir, "Realised_Demand_Germany.csv"
    )
)

data.process_na_values(weather_data, CONF).to_csv(
    os.path.join(CONF.data_processing.preprocessed_data_dir, "Weather_Data_Germany.csv")
)

### Processed data inspection

In [None]:
# Load data
installed_capacity, prices, realised_supply, realised_demand, weather_data = (
    data.load_data(CONF, data_type="preprocessed")
)

# Generate profile reports
installed_capacity_profile = ProfileReport(
    installed_capacity, title="Preprocessed Installed Capacity"
)
installed_capacity_profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Installed_Capacity_Germany.html",
    )
)

prices_profile = ProfileReport(prices, title="Preprocessed Prices Europe")
prices_profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir, "Prices_Europe.html"
    )
)

realised_supply_profile = ProfileReport(realised_supply, title="Preprocessed Realised Supply")
realised_supply_profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Realised_Supply_Germany.html",
    )
)

realised_demand_profile = ProfileReport(realised_demand, title="Preprocessed Realised Demand")
realised_demand_profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Realised_Demand_Germany.html",
    )
)

weather_data_profile = ProfileReport(weather_data, title="Preprocessed Weather Data", minimal=True)
weather_data_profile.to_file(
    os.path.join(
        CONF.data.preprocessed_data_inspection_dir,
        "Weather_Data_Germany.html",
    )
)