In [None]:
%matplotlib notebook

assert 'imputed_dfs' in locals() and imputed_dfs is not None, "The imputation needs to be ran"


def mean_squared_error(ref_values: np.array, pred_values: np.array) -> float:
    return ((ref_values - pred_values) ** 2).mean()


def percent_bias(ref_values: np.array, pred_values: np.array) -> float:
    return 100 * sum(pred_values - ref_values) / sum(ref_values)


def get_min_and_max_diffs(ref_values: np.array, pred_values: np.array, gap_indices: [[datetime]]):
    maxs_error = []
    mins_error = []
    start = 0
    for gap in gap_indices:
        end = start + len(gap)
        ref_view = ref_values[start:end]
        pred_view = pred_values[start:end]
        maxs_error.append(ref_view.max() - pred_view.max())
        mins_error.append(ref_view.min() - pred_view.min())
    return np.mean(mins_error), np.mean(maxs_error)


def plot_imputation(df: pd.DataFrame, gapped: pd.DataFrame, imputed: pd.DataFrame, ylabel: str, title="Untitled"):
    fig, axs = plt.subplots(2, 1, figsize=(12, 12), constrained_layout=True)
    fig.suptitle(title, fontsize=16)

    axs[0].grid(True)
    axs[0].set_title("Comparison of reference and imputed data")
    axs[0].set_xlabel("Time")
    axs[0].set_ylabel(ylabel)
    axs[0].plot(df, c="green", label="Reference data")
    axs[0].plot(imputed, c="red", label="Imputed data")
    axs[0].plot(gapped, c="cyan", label="Data with gaps")
    axs[0].legend(loc=2)

    axs[1].grid(True)
    axs[1].set_title("Imputation error")
    axs[1].set_xlabel("Time")
    axs[1].set_ylabel("Error")
    axs[1].plot(imputed - df, c="blue")
    start1, end1 = axs[0].get_ylim()
    start2, end2 = axs[1].get_ylim()
    ratio = abs(end1 - start1) / abs(end2 - start2)
    axs[1].set_ylim(start2 * ratio, end2 * ratio)
    return fig


saving_results = []
indexes_save = []
gap_indexes_save = []

eval_plots = []
eval_results = []
imputed_file = dfloader.filename.split('/')[-1]

for target in dfloader.targets:
    eval_results.append(pd.DataFrame())
    ref_mean = round(df[target].mean(), 3)
    ref_median = round(df[target].median(), 3)

    for i in range(len(imputed_dfs)):
        flattened_indices = [it for sublist in gaps_indices[i] for it in sublist]
        ref_df = df[df.index.isin(flattened_indices)]
        pred_df = imputed_dfs[i][imputed_dfs[i].index.isin(flattened_indices)]

        errors = ref_df[target].values - pred_df[target].values
        abs_errors = np.absolute(errors)

        min_diff, max_diff = get_min_and_max_diffs(ref_df[target].values, pred_df[target].values, gaps_indices[i])
        ref_skew = ref_df[target].skew()
        pred_skew = pred_df[target].skew()
        ref_var = ref_df[target].var()
        pred_var = pred_df[target].var()
        ref_kurtosis = ref_df[target].kurtosis()
        pred_kurtosis = pred_df[target].kurtosis()
        ref_std = ref_df[target].std()
        pred_std = pred_df[target].std()
        pred_mean = imputed_dfs[i][target].mean()
        pred_median = imputed_dfs[i][target].median()

        title = f"{imputer['title']} - {target} - gap type {i + 1} - {imputation_date}"
        results = {
            "Mean squared error": round(mean_squared_error(ref_df[target].values, pred_df[target].values), 3),
            "Raw bias": round(errors.mean(), 3),
            "Absolute bias": round(abs_errors.mean(), 3),
            "Percent bias": round(percent_bias(ref_df[target].values, pred_df[target].values), 3),
            "Errors sum": round(abs_errors.sum(), 3),
            "Mean minimum error": round(min_diff, 3),
            "Mean maximum error": round(max_diff, 3),
            "Maximum error": round(abs_errors.max(), 3),

            "Reference variance": round(ref_var, 3),
            "Imputed variance": round(pred_var, 3),
            "Variance error": round(ref_var - pred_var, 3),

            "Reference kurtosis": round(ref_kurtosis, 3),
            "Imputed kurtosis": round(pred_kurtosis, 3),
            "Kurtosis error": round(ref_kurtosis - pred_kurtosis, 3),

            "Reference skewness": round(ref_skew, 3),
            "Imputed skewness": round(pred_skew, 3),
            "Skewness error": round(ref_skew - pred_skew, 3),

            "Reference standard deviation": round(ref_std, 3),
            "Imputed standard deviation": round(pred_std, 3),
            "Standard deviation error": round(ref_std - pred_std, 3),

            "Reference mean": round(ref_mean, 3),
            "Imputed mean": round(pred_mean, 3),
            "Mean error": round(ref_mean - pred_mean, 3),

            "Reference median": round(ref_median, 3),
            "Imputed median": round(pred_median, 3),
            "Median error": round(ref_median - pred_median, 3),

            "Method": imputer['title'],
            "Gap type": f"{i + 1} [{dataset_config['gaps'][i][0]}-{dataset_config['gaps'][i][1]}]".replace(";", ","),
            "Random state": random_state,
            "Target field": target,
            "Date": imputation_date,
            "File": imputed_file,
            "Imputer config": imputer_configs[i]
        }

        eval_results[-1] = eval_results[-1].append(pd.DataFrame([results.values()], columns=results.keys(), index=[title]))

        # Saving the results
        gap_indexes_save.append(f"Data with gap type {i + 1} [{dataset_config['gaps'][i][0]}-{dataset_config['gaps'][i][1]}]")
        indexes_save.append(title)
        saving_results.append(results)

    display(eval_results[-1])

    figures = []
    for i in range(len(imputed_dfs)):
        title = f"{imputer['title']} with gap type {i + 1} [{dataset_config['gaps'][i][0]}-{dataset_config['gaps'][i][1]}] [{target}]"
        figures.append(plot_imputation(df[[target]], dfs_with_gaps[i][[target]], imputed_dfs[i][[target]], target, title))
    eval_plots.append(figures)
