# Pipeline

This pipeline is intended to simplify to whole process of loading a dataset, creating gaps of different types in it, imputing the missing data and evaluating the imputation method.
You may want to edit the cells preceded by an <span style="color: red">**EDIT:**</span> sign to fit your needs.

## Install dependencies

In [None]:
!pip install openpyxl > /dev/null 2>&1
!pip install jupyterlab-widgets > /dev/null 2>&1
!pip install jsfileupload > /dev/null 2>&1
!pip install pyxlsb > /dev/null 2>&1
!pip install sklearn > /dev/null 2>&1
!pip install scipy > /dev/null 2>&1
!pip install scikit_learn > /dev/null 2>&1
!pip install missingpy > /dev/null 2>&1
!pip install fancyimpute > /dev/null 2>&1

## Run the imputations

In [None]:
import ipywidgets as widgets
import random
import torch
import numpy as np

random_state = 0

filtered_imputers = [
    "median imputation",
    "mode imputation",
    "mean imputation",
#     "fillna",
    "linear_interpolation",
#     "missForest_regressor_MICE",
#     "bayesian_ridge_MICE",
    "forward_fill",
    "Hot deck",
    "KNN k=1",
    "KNN k=5",
    "KNN k=10",
    "KNN k=15",
    "KNN k=20",
    "KNN k=100",
    "RNN"
]

to_impute = [
    {
        "conf": "knmi",
        "file": "260 De Bilt.csv",
        "target": ("0", "Temperature"),
        "features": [
            ("0", "Temperature"),
            ("0", "Global Radiation"),
            ("0", "Dew Temperature"),
            ("0", "Relative atmospheric humidity")
        ]
    },
    {
        "conf": "knmi",
        "file": "260 De Bilt.csv",
        "target": ("0", "Relative atmospheric humidity"),
        "features": [
            ("0", "Relative atmospheric humidity"),
            ("0", "Sunshine duration"),
            ("0", "Global Radiation"),
            ("0", "Horizontal visibility")
        ]
    },
    {
        "conf": "knmi",
        "file": "260 De Bilt.csv",
        "target": ("0", "Global Radiation"),
        "features": [
            ("0", "Global Radiation"),
            ("0", "Relative atmospheric humidity"),
            ("0", "Temperature"),
            ("0", "Sunshine duration")
        ]
    },
    {
        "conf": "factory zero",
        "file": "099.xlsx",
        "target": ("alklimaHeatPump", "flow_temp"),
        "features": [
            ("alklimaHeatPump", "flow_temp"),
            ("alklimaHeatPump", "return_temp"),
            ("energyHeatpump", "power")
        ]
    },
    {
        "conf": "factory zero",
        "file": "099.xlsx",
        "target": ("alklimaHeatPump", "op_mode"),
        "features": [
            ("alklimaHeatPump", "op_mode"),
            ("ventilation", "outdoor_temp")
        ]
    },
    {
        "conf": "factory zero",
        "file": "099.xlsx",
        "target": ("co2sensor", "co2"),
        "features": [
            ("co2sensor", "co2"),
            ("co2sensor", "voc")
        ]
    },
    {
        "conf": "factory zero",
        "file": "099.xlsx",
        "target": ("smartMeter", "power"),
        "features": [
            ("smartMeter", "power"),
            ("ventilation", "outdoor_temp"),
            ("ventilation", "room_temp"),
            ("solar", "power")
        ]
    },
]

devNullOutput = widgets.Output()

for it in to_impute:
    # Load the data
    %run helpers/load_data.ipynb
    config_select.value = it["conf"]
    file_select.value = it["file"]
    dfloader.add_targets(it['target'][1], sheet_name=it['target'][0])
    for feature in it['features']:
        dfloader.add_features(feature[1], sheet_name=feature[0])
    df = dfloader.df

    # Set random state
    torch.manual_seed(random_state)
    np.random.seed(random_state)
    random.seed(random_state)

    # Create gaps
    %run helpers/create_gaps.ipynb

    # Run the imputers
    %run helpers/impute.ipynb
    for k in filtered_imputers:
        try:
            if k == "Hot deck":
                imputer_select.value = "Hot deck" if it["target"] != "op_mode" else "Hot deck (classification)"
                ext = it["file"][-4:]
                hd_donors_selector.value = [opt for opt in hd_donors_selector.options if opt != it["file"] and opt.endswith(ext)]
                hd_confirm_button.click()
            else:
                imputer_select.value = k

            if k == "RNN":
                model_file = f'./models/rnn-{it["target"][1].replace(" ", "_")}.pt'
                rnn_model_text_input.value = model_file
                rnn_model_submit_button.click()

            # Evaluate and save results
            with devNullOutput:
                %run helpers/evaluate.ipynb
                %run helpers/saving.ipynb
                save_eval_btn.click()
                save_seperatly_btn.click()
            print(f"Done running {k} on {it['target']}")
        except:
            print(f"Error running {k} on {it['target']}")


In [None]:
!cp -r saved_results saved_results_`date +%s`