In [1]:
#default_exp pipeline

# Pipeline

<br>

### Imports

In [2]:
#exports
import numpy as np
import pandas as pd

from dagster import execute_pipeline, pipeline, solid, Field

import os
from batopt import clean, discharge

<br>

### End-to-End

We're now going to combine these steps into a pipeline using dagster, first we'll create the individual components.

In [3]:
@solid()
def load_data(_, raw_data_dir: str):
    loaded_data = dict()
    
    loaded_data['pv'] = clean.load_training_dataset(raw_data_dir, 'pv')
    loaded_data['demand'] = clean.load_training_dataset(raw_data_dir, 'demand')
    loaded_data['weather'] = clean.load_training_dataset(raw_data_dir, 'weather', dt_idx_freq='H')
    
    return loaded_data

@solid()
def clean_data(_, loaded_data, intermediate_data_dir: str):
    # Cleaning
    cleaned_data = dict()

    cleaned_data['pv'] = (loaded_data['pv']
                          .pipe(clean.interpolate_missing_panel_temps, loaded_data['weather'])
                          .pipe(clean.interpolate_missing_site_irradiance, loaded_data['weather'])
                          .pipe(clean.interpolate_missing_site_power)
                         )
    cleaned_data['weather'] = clean.interpolate_missing_weather_solar(loaded_data['pv'], loaded_data['weather'])
    cleaned_data['demand'] = loaded_data['demand']
    
    # Saving
    if os.path.exists(intermediate_data_dir) == False:
        os.mkdir(intermediate_data_dir)
        
    cleaned_data['pv'].to_csv(f'{intermediate_data_dir}/pv_cleaned.csv')
    cleaned_data['demand'].to_csv(f'{intermediate_data_dir}/demand_cleaned.csv')
    cleaned_data['weather'].to_csv(f'{intermediate_data_dir}/weather_cleaned.csv')
            
    return intermediate_data_dir

@solid()
def fit_and_save_discharge_model(_, intermediate_data_dir: str, discharge_opt_model_fp: str, model_params: dict):
    X, y = discharge.prepare_training_input_data(intermediate_data_dir)
    discharge.fit_and_save_model(X, y, discharge_opt_model_fp, **model_params)
    
    return 

@solid()
def construct_battery_profile(_, cleaned_data_dir: str, raw_data_dir: str, discharge_opt_model_fp: str):
    s_discharge_profile = discharge.optimise_latest_test_discharge_profile(raw_data_dir, cleaned_data_dir, discharge_opt_model_fp)
    
    s_battery_profile = s_discharge_profile
    
    return s_battery_profile

<br>

Then we'll combine them in a pipeline

In [4]:
@pipeline
def end_to_end_pipeline(): 
    loaded_data = load_data()
    cleaned_data_dir = clean_data(loaded_data)
    
    fit_and_save_discharge_model(cleaned_data_dir)
    s_battery_profile = construct_battery_profile(cleaned_data_dir)
    # Should use `great expectations` to check that the battery profile doesnt break the constraints

<br>

Which we'll now run a test withmkdir

In [None]:
run_config = {
    'solids': {
        'load_data': {
            'inputs': {
                'raw_data_dir': '../data/raw',
            },
        },
        'clean_data': {
            'inputs': {
                'intermediate_data_dir': '../data/intermediate',
            },
        },
        'fit_and_save_discharge_model': {
            'inputs': {
                'discharge_opt_model_fp': '../models/discharge_opt.sav',
                'model_params': {
                    'criterion': 'mse',
                    'max_depth': 10,
                    'min_samples_leaf': 4,
                    'min_samples_split': 2,
                    'n_estimators': 100                    
                }
            },
        },
        'construct_battery_profile': {
            'inputs': {
                'raw_data_dir': '../data/raw',
                'discharge_opt_model_fp': '../models/discharge_opt.sav',
            },
        },
    }
}

execute_pipeline(end_to_end_pipeline, run_config=run_config)

[32m2021-02-08 17:39:53[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 9305bdf6-61e1-423f-8422-04caa3ea10cc - 20432 - ENGINE_EVENT - Starting initialization of resources [asset_store].
[32m2021-02-08 17:39:53[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 9305bdf6-61e1-423f-8422-04caa3ea10cc - 20432 - ENGINE_EVENT - Finished initialization of resources [asset_store].
[32m2021-02-08 17:39:53[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 9305bdf6-61e1-423f-8422-04caa3ea10cc - 20432 - PIPELINE_START - Started execution of pipeline "end_to_end_pipeline".
[32m2021-02-08 17:39:53[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 9305bdf6-61e1-423f-8422-04caa3ea10cc - 20432 - ENGINE_EVENT - Executing steps in process (pid: 20432)
[32m2021-02-08 17:39:53[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 9305bdf6-61e1-423f-8422-04caa3ea10cc - 20432 - load_data.compute - STEP_START - Started execution of step "load_data.compute".
[32m2021-02-08 17:39:53[

<br>

Finally we'll export the relevant code to our `batopt` module

In [None]:
#hide
from nbdev.export import notebook2script
    
notebook2script()