# Visualize Results: Downstream Performance - "Fully Observed" Experiment

This notebook should answer the questions: *Does imputation lead to better downstream performances?*

## Notebook Structure 

* Application Scenario 2 - Downstream Performance  
   * Categorical  Columns (Classification)
   * Numerical Columns (Regression)
   * Heterogenous Columns (Classification and Regression Combined)

In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import re
import seaborn as sns

from pathlib import Path
from data_imputation_paper.experiment import read_experiment, read_csv_files
from data_imputation_paper.plotting import draw_cat_box_plot

%matplotlib inline

%load_ext autoreload
%autoreload 2

2023-03-08 09:33:07.325752: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 09:33:07.413070: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-08 09:33:07.413084: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-08 09:33:07.851331: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

## Settings

In [2]:
sns.set(style="whitegrid")
sns.set_context('paper', font_scale=1.5)
mpl.rcParams['lines.linewidth'] = '2'

In [3]:
EXPERIMENT = "fully_observed_fix"

EXPERIMENT_PATH = Path(f"../data/experiments/{EXPERIMENT}/")

CLF_METRIC = "Classification Tasks"
REG_METRIC = "Regression Tasks"

DOWNSTREAM_RESULT_TYPE = "downstream_performance_mean"
IMPUTE_RESULT_TYPE = "impute_performance_mean"

FIGURES_PATH = Path(f"../paper/figures/")

## Import the data

In [4]:
%%time

results = read_csv_files(read_experiment(EXPERIMENT_PATH), read_details=False)
results.head()

ValueError: Length mismatch: Expected axis has 0 elements, new values have 8 elements

In [5]:
na_impute_results = results[
    (results["result_type"] == IMPUTE_RESULT_TYPE) & 
    (results["metric"].isin(["F1_macro", "RMSE"]))
]
na_impute_results.drop(["baseline", "corrupted", "imputed"], axis=1, inplace=True)
na_impute_results = na_impute_results[na_impute_results.isna().any(axis=1)]
na_impute_results.shape

NameError: name 'results' is not defined

In [6]:
downstream_results = results[
    (results["result_type"] == DOWNSTREAM_RESULT_TYPE) & 
    (results["metric"].isin(["F1_macro", "RMSE"]))
]

# remove experiments where imputation failed
downstream_results = downstream_results.merge(
    na_impute_results,
    how = "left",
    validate = "one_to_one",
    indicator = True,
    suffixes=("", "_imp"),
    on = ["experiment", "imputer", "task", "missing_type", "missing_fraction", "strategy", "column"]
)
downstream_results = downstream_results[downstream_results["_merge"]=="left_only"]

assert len(results["strategy"].unique()) == 1
downstream_results.drop(["experiment", "strategy", "result_type_imp", "metric_imp", "train", "test", "train_imp", "test_imp", "_merge"], axis=1, inplace=True)

downstream_results = downstream_results.rename(
    {
        "imputer": "Imputation Method",
        "task": "Task",
        "missing_type": "Missing Type",
        "missing_fraction": "Missing Fraction",
        "column": "Column",
        "baseline": "Baseline",
        "imputed": "Imputed",
        "corrupted": "Corrupted"
    },
    axis = 1
)

NameError: name 'results' is not defined

In [7]:
rename_imputer_dict = {
    "ModeImputer": "Mean/Mode",
    "KNNImputer": "$k$-NN",
    "ForestImputer": "Random Forest",
    "AutoKerasImputer": "Discriminative DL",
    "VAEImputer": "VAE",
    "GAINImputer": "GAIN"    
}

rename_metric_dict = {
    "F1_macro": CLF_METRIC,
    "RMSE": REG_METRIC
}

downstream_results = downstream_results.replace(rename_imputer_dict)
downstream_results = downstream_results.replace(rename_metric_dict)

downstream_results

NameError: name 'downstream_results' is not defined

### Robustness: check which imputers yielded `NaN`values

In [8]:
for col in downstream_results.columns:
    na_sum = downstream_results[col].isna().sum()
    if na_sum > 0:
        print("-----" * 10)        
        print(col, na_sum)
        print("-----" * 10)        
        na_idx = downstream_results[col].isna()
        print(downstream_results.loc[na_idx, "Imputation Method"].value_counts(dropna=False))
        print("\n")

NameError: name 'downstream_results' is not defined

## Compute Downstream Performance relative to Baseline

In [10]:
clf_row_idx = downstream_results["metric"] == CLF_METRIC
reg_row_idx = downstream_results["metric"] == REG_METRIC

In [11]:
downstream_results["Improvement"]   = (downstream_results["Imputed"] - downstream_results["Corrupted"]  ) / downstream_results["Baseline"]
downstream_results.loc[reg_row_idx, "Improvement"]   = downstream_results.loc[reg_row_idx, "Improvement"]   * -1

downstream_results

Unnamed: 0,Imputation Method,Task,Missing Type,Missing Fraction,Column,result_type,metric,Baseline,Corrupted,Imputed,Improvement
0,Discriminative DL,40922,MAR,0.01,gyro_y,downstream_performance_mean,Classification Tasks,0.735632,0.735582,0.735822,0.000326
1,Discriminative DL,40922,MAR,0.30,gyro_y,downstream_performance_mean,Classification Tasks,0.735632,0.725199,0.731309,0.008306
2,Discriminative DL,40922,MAR,0.50,gyro_y,downstream_performance_mean,Classification Tasks,0.735632,0.720784,0.732196,0.015513
3,Discriminative DL,40922,MAR,0.10,gyro_y,downstream_performance_mean,Classification Tasks,0.735632,0.735289,0.735309,0.000028
4,Discriminative DL,40922,MCAR,0.01,gyro_y,downstream_performance_mean,Classification Tasks,0.735632,0.735530,0.735632,0.000138
...,...,...,...,...,...,...,...,...,...,...,...
4951,GAIN,1471,MCAR,0.10,V9,downstream_performance_mean,Classification Tasks,0.508405,0.497643,0.515090,0.034317
4952,GAIN,1471,MNAR,0.01,V9,downstream_performance_mean,Classification Tasks,0.508405,0.506586,0.508398,0.003565
4953,GAIN,1471,MNAR,0.30,V9,downstream_performance_mean,Classification Tasks,0.508405,0.480143,0.530432,0.098914
4954,GAIN,1471,MNAR,0.50,V9,downstream_performance_mean,Classification Tasks,0.508405,0.451953,0.498481,0.091519


## Application Scenario 2 - Downstream Performance

### Categorical  Columns (Classification)

In [9]:
draw_cat_box_plot(
    downstream_results,
    "Improvement",
    (-0.15, 0.3),
    FIGURES_PATH,
    "fully_observed_downstream_boxplot.eps",
    hue_order=list(rename_imputer_dict.values()),
    row_order=list(rename_metric_dict.values())
)

NameError: name 'downstream_results' is not defined