In [1]:
#default_exp pipeline

# Pipeline

<br>

### Imports

In [2]:
#exports
import numpy as np
import pandas as pd

from dagster import execute_pipeline, pipeline, solid, Field

import os
from batopt import clean, discharge, charge

<br>

### End-to-End

We're now going to combine these steps into a pipeline using dagster, first we'll create the individual components.

In [13]:
@solid()
def load_data(_, raw_data_dir: str):
    loaded_data = dict()
    
    loaded_data['pv'] = clean.load_training_dataset(raw_data_dir, 'pv')
    loaded_data['demand'] = clean.load_training_dataset(raw_data_dir, 'demand')
    loaded_data['weather'] = clean.load_training_dataset(raw_data_dir, 'weather', dt_idx_freq='H')
    
    return loaded_data

@solid()
def clean_data(_, loaded_data, intermediate_data_dir: str):
    # Cleaning
    cleaned_data = dict()

    cleaned_data['pv'] = (loaded_data['pv']
                          .pipe(clean.interpolate_missing_panel_temps, loaded_data['weather'])
                          .pipe(clean.interpolate_missing_site_irradiance, loaded_data['weather'])
                          .pipe(clean.interpolate_missing_site_power)
                         )
    cleaned_data['weather'] = clean.interpolate_missing_weather_solar(loaded_data['pv'], loaded_data['weather'])
    cleaned_data['demand'] = loaded_data['demand']
    
    # Saving
    if os.path.exists(intermediate_data_dir) == False:
        os.mkdir(intermediate_data_dir)
        
    cleaned_data['pv'].to_csv(f'{intermediate_data_dir}/pv_cleaned.csv')
    cleaned_data['demand'].to_csv(f'{intermediate_data_dir}/demand_cleaned.csv')
    cleaned_data['weather'].to_csv(f'{intermediate_data_dir}/weather_cleaned.csv')
            
    return intermediate_data_dir

@solid()
def fit_and_save_charge_model(_, intermediate_data_dir: str, charge_opt_model_fp: str, model_params: dict):
    X, y = discharge.prepare_training_input_data(intermediate_data_dir)
    charge.fit_and_save_charging_model(X, y, charge_opt_model_fp, **model_params)
    
    return True

@solid()
def fit_and_save_discharge_model(_, intermediate_data_dir: str, discharge_opt_model_fp: str, model_params: dict):
    X, y = discharge.prepare_training_input_data(intermediate_data_dir)
    discharge.fit_and_save_model(X, y, discharge_opt_model_fp, **model_params)
    
    return True

@solid()
def construct_battery_profile(_, charge_model_success: bool, discharge_model_success: bool, intermediate_data_dir: str, raw_data_dir: str, output_data_dir: str, discharge_opt_model_fp: str, charge_opt_model_fp: str):
    assert charge_model_success and discharge_model_success, 'Model training was unsuccessful'
    
    s_discharge_profile = discharge.optimise_latest_test_discharge_profile(raw_data_dir, intermediate_data_dir, discharge_opt_model_fp)
    s_charge_profile = charge.optimise_latest_test_charge_profile(raw_data_dir, intermediate_data_dir, charge_opt_model_fp)
    
    from warnings import warn
    warn(str(s_discharge_profile.isnull().mean()))
    warn(str(s_charge_profile.isnull().mean()))
    
    s_battery_profile = s_charge_profile + s_discharge_profile
    s_battery_profile.name = 'charge_MW'
    
    # Saving
    if os.path.exists(output_data_dir) == False:
        os.mkdir(output_data_dir)
        
    s_battery_profile.to_csv(f'{output_data_dir}/test.csv')
    
    return s_battery_profile

<br>

Then we'll combine them in a pipeline

In [14]:
@pipeline
def end_to_end_pipeline(): 
    loaded_data = load_data()
    intermediate_data_dir = clean_data(loaded_data)
    
    charge_model_success = fit_and_save_charge_model(intermediate_data_dir)
    discharge_model_success = fit_and_save_discharge_model(intermediate_data_dir)
    
    s_battery_profile = construct_battery_profile(charge_model_success, discharge_model_success, intermediate_data_dir)
        
    # Should use `great expectations` to check that the battery profile doesnt break the constraints

<br>

Which we'll now run a test

In [15]:
run_config = {
    'solids': {
        'load_data': {
            'inputs': {
                'raw_data_dir': '../data/raw',
            },
        },
        'clean_data': {
            'inputs': {
                'intermediate_data_dir': '../data/intermediate',
            },
        },
        'fit_and_save_discharge_model': {
            'inputs': {
                'discharge_opt_model_fp': '../models/discharge_opt.sav',
                'model_params': {
                    'criterion': 'mse',
                    'max_depth': 10,
                    'min_samples_leaf': 4,
                    'min_samples_split': 2,
                    'n_estimators': 100                    
                }
            },
        },
        'fit_and_save_charge_model': {
            'inputs': {
                'charge_opt_model_fp': '../models/charge_opt.sav',
                'model_params': {               
                }
            },
        },
        'construct_battery_profile': {
            'inputs': {
                'raw_data_dir': '../data/raw',
                'output_data_dir': '../data/output',
                'discharge_opt_model_fp': '../models/discharge_opt.sav',
                'charge_opt_model_fp': '../models/charge_opt.sav',
            },
        },
    }
}

execute_pipeline(end_to_end_pipeline, run_config=run_config)

[32m2021-02-09 10:42:30[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 3ee2c786-c3c7-4fdc-8157-8809bb662f12 - 23156 - ENGINE_EVENT - Starting initialization of resources [asset_store].
[32m2021-02-09 10:42:30[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 3ee2c786-c3c7-4fdc-8157-8809bb662f12 - 23156 - ENGINE_EVENT - Finished initialization of resources [asset_store].
[32m2021-02-09 10:42:30[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 3ee2c786-c3c7-4fdc-8157-8809bb662f12 - 23156 - PIPELINE_START - Started execution of pipeline "end_to_end_pipeline".
[32m2021-02-09 10:42:30[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 3ee2c786-c3c7-4fdc-8157-8809bb662f12 - 23156 - ENGINE_EVENT - Executing steps in process (pid: 23156)
[32m2021-02-09 10:42:30[0m - dagster - [34mDEBUG[0m - end_to_end_pipeline - 3ee2c786-c3c7-4fdc-8157-8809bb662f12 - 23156 - load_data.compute - STEP_START - Started execution of step "load_data.compute".
[32m2021-02-09 10:42:30[

<dagster.core.execution.results.PipelineExecutionResult at 0x17c13478520>

<br>

Finally we'll export the relevant code to our `batopt` module

In [9]:
#hide
from nbdev.export import notebook2script
    
notebook2script()

Converted 00-utilities.ipynb.
Converted 01-data-cleaning.ipynb.
Converted 02-battery-discharge.ipynb.
Converted 03-battery-charge.ipynb.
Converted 04-battery-optimisation.ipynb.
Converted 05-evaluation.ipynb.
Converted 06-pipeline.ipynb.
