In [None]:
%matplotlib notebook


def mean_squared_error(ref: pd.DataFrame, pred: pd.DataFrame) -> float:
    return np.square(np.subtract(ref, pred)).mean().values[0]


def percent_bias(ref_values: [float], pred_values: [float]) -> float:
    ref_mean = np.mean(ref_values)
    pred_mean = np.mean(pred_values)
    return 100 * abs((pred_mean - ref_mean) / ref_mean)


def plot_imputation(df: pd.DataFrame, gapped: pd.DataFrame, imputed: pd.DataFrame, ylabel: str, title="Untitled"):
    fig1, ax1 = plt.subplots(figsize=(9, 6))
    plt.grid(True)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel(ylabel)
    plt.plot(df, c="green", label="Reference data")
    plt.plot(imputed, c="red", label="Imputed data")
    plt.plot(gapped, c="cyan", label="Data with gaps")
    ax1.legend()
    plt.show()

    fig2, ax2 = plt.subplots(figsize=(9, 6))
    ax2.set_ylim(ax1.get_ylim())
    plt.grid(True)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel("Error")
    plt.plot(imputed - df, c="blue")
    plt.show()


saving_results = []
indexes_save = []
gap_indexes_save = []
for target in dfloader.targets:
    eval_results = None
    for i in range(len(imputed_dfs)):
        flattened_indices = [it for sublist in gaps_indices[i] for it in sublist]
        ref_values = np.array([df[target][index] for index in flattened_indices])
        pred_values = np.array([imputed_dfs[i][target][index] for index in flattened_indices])
        errors = ref_values - pred_values
        abs_errors = np.absolute(errors)

        title = f"{imputer['title']} with gap type {i + 1} [{dataset_config['gaps'][i][0]};{dataset_config['gaps'][i][1]}] [{target}]"
        results = {
            "Mean squared error": mean_squared_error(df[[target]], imputed_dfs[i][[target]]),
            "Raw bias": errors.mean(),
            "Absolute bias": abs_errors.mean(),
            "Percent bias": percent_bias(ref_values, pred_values),
            "Errors sum": abs_errors.sum(),
            "Maximum error": abs_errors.max(),
            "Variance error": df[target].var() - imputed_dfs[i][target].var(),
            "Kurtosis error": df[target].kurtosis(skipna=True) - imputed_dfs[i][target].kurtosis(skipna=True),
            "Skewness error": df[target].skew(skipna=True) - imputed_dfs[i][target].skew(skipna=True),
            "Standard deviation error": df[target].std() - imputed_dfs[i][target].std(),
            "Mean error": df[target].mean() - imputed_dfs[i][target].mean(),
            "Median error": df[target].median() - imputed_dfs[i][target].median(),
        }

        if eval_results is None:
            eval_results = pd.DataFrame(index=results.keys())
        eval_results[title] = results.values()

        # Saving the results
        gap_indexes_save.append(f"Data with gap type {i + 1} [{dataset_config['gaps'][i][0]};{dataset_config['gaps'][i][1]}]")
        indexes_save.append(title)
        saving_results.append(results)

    display(eval_results)

    for i in range(len(imputed_dfs)):
        title = f"{imputer['title']} with gap type {i + 1} [{dataset_config['gaps'][i][0]};{dataset_config['gaps'][i][1]}] [{target}]"
        print(f"\n\n----------- {title} -----------\n\n")
        plot_imputation(df[[target]], dfs_with_gaps[i][[target]], imputed_dfs[i][[target]], target, title)
