In [None]:
%matplotlib notebook


def mean_squared_error(ref: pd.DataFrame, pred: pd.DataFrame) -> float:
    return np.square(np.subtract(ref, pred)).mean().values[0]


def percent_bias(ref_values: np.array, pred_values: np.array) -> float:
    ref_mean = ref_values.mean()
    pred_mean = pred_values.mean()
    return 100 * abs((pred_mean - ref_mean) / ref_mean)


def plot_imputation(gapped: pd.DataFrame, imputed: pd.DataFrame, y_label: str, title="Untitled"):
    global df
    fig1, ax1 = plt.subplots(figsize=(9, 6))
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel(y_label)
    plt.plot(df, c="green", label="Reference data")
    plt.plot(imputed, c="red", label="Imputed data")
    plt.plot(gapped, c="cyan", label="Data with gaps")
    ax1.legend()
    plt.show()

    fig2, ax2 = plt.subplots(figsize=(9, 6))
    start, end = ax1.get_ylim()
    absdiff = start - end
    ax2.set_ylim(absdiff, -absdiff)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel("Error")
    plt.plot(imputed - df, c="blue")
    plt.show()


eval_results = None
for i in range(len(imputed_dfs)):
    flattened_indices = [it for sublist in gaps_indices[i] for it in sublist]
    ref_values = np.array([df[column_name][index] for index in flattened_indices])
    pred_values = np.array([imputed_dfs[i].iloc[:, 0][index] for index in flattened_indices])
    errors = ref_values - pred_values
    abs_errors = np.absolute(errors)

    title = f"Gap type {i + 1} [{dataset_config['gaps'][i][0]};{dataset_config['gaps'][i][1]}]"
    results = {
        "Mean squared error": mean_squared_error(df, imputed_dfs[i]),
        "Raw bias": errors.mean(),
        "Absolute bias": abs_errors.mean(),
        "Percent bias": percent_bias(ref_values, pred_values),
        "Errors sum": abs_errors.sum(),
        "Maximum error": abs_errors.max(),
        "Variance error": df[column_name].var() - imputed_dfs[i][column_name].var(),
        "Kurtosis error": df[column_name].kurtosis(skipna=True) - imputed_dfs[i][column_name].kurtosis(skipna=True),
        "Skewness error": df[column_name].skew(skipna=True) - imputed_dfs[i][column_name].skew(skipna=True),
        "Standard deviation error": df[column_name].std() - imputed_dfs[i][column_name].std(),
        "Mean error": df[column_name].mean() - imputed_dfs[i][column_name].mean(),
        "Median error": df[column_name].median() - imputed_dfs[i][column_name].median(),
    }

    if eval_results is None:
        eval_results = pd.DataFrame(index=results.keys())

    eval_results[title] = results.values()

display(eval_results)

for i in range(len(imputed_dfs)):
    print("\n\n--------------------------------- " + 
          f"Hotdeck with gap type {i + 1} [{dataset_config['gaps'][i][0]};{dataset_config['gaps'][i][1]}]"+
          " ---------------------------------\n\n")
    plot_imputation(dfs_with_gaps[i], imputed_dfs[i], column_name, f"Hotdeck with gap type {i + 1}")
