In [None]:
%matplotlib notebook

def pretty_print(d, indent=0):
    for key, value in d.items():
        print('\t' * indent + str(key))
        if isinstance(value, dict):
            pretty_print(value, indent+1)
        else:
            print('\t' * (indent+1) + str(value))


def plot_error_distribution(errors: [float], title: str):
    plt.subplots()
    plt.title(f"Error distribution - {title}")
    plt.xlabel("Error")
    plt.ylabel("Density")
    plt.hist(errors, 50, density=True, alpha=0.75)
    plt.show() 

    
def mean_squared_error(ref: pd.DataFrame, pred: pd.DataFrame) -> float:
    return np.square(np.subtract(ref, pred)).mean().values[0]


def raw_bias(errors: [float]) -> float:
    return np.mean(errors)


def abs_raw_bias(abs_errors: [float]) -> float:
    return np.mean(abs_errors)

    
def percent_bias(ref_values: [float], pred_values: [float]) -> float:
    ref_mean = np.mean(ref_values)
    pred_mean = np.mean(pred_values)
    return 100 * abs((pred_mean - ref_mean) / ref_mean)


def sum_error(abs_errors: [float]) -> float:
    return np.sum(abs_errors)


def max_error(errors: [float]) -> float:
    res = 0
    for val in errors:
        res = val if abs(val) > res else res
    return res


def variance(df: pd.DataFrame) -> float:
    series = df.iloc[:, 0]
    mean = series.mean()
    sq_diffs = [ (it - mean) ** 2 for it in series ]
    variance = np.sum(sq_diffs) / (len(series) - 1)
    return variance


def kurtosis(df: pd.DataFrame):
    return float(df.kurtosis(skipna=True)[0])


def skewness(df: pd.DataFrame):
    return float(df.skew(skipna=True)[0])


def plot_imputation(gapped: pd.DataFrame, imputed: pd.DataFrame, y_label: str, title="Untitled"):
    global df
    fig, ax = plt.subplots(figsize=(9, 6))
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel(y_label)
    plt.plot(df, c="green", label="Reference data")
    plt.plot(imputed, c="red", label="Imputed data")
    plt.plot(gapped, c="cyan", label="Data with gaps")
    ax.legend()
    plt.show()

    fig, ax = plt.subplots(figsize=(9, 6))
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel("Error")
    plt.plot(df - imputed, c="blue")
    plt.show()
    
saved_results = []
indexes_save = []
for i in range(len(imputed_dfs)):
    flattened_indices = [it for sublist in gaps_indices[i] for it in sublist]
    ref_values = [df.iloc[:, 0][index] for index in flattened_indices]
    pred_values = [imputed_dfs[i].iloc[:, 0][index]
                   for index in flattened_indices]

    errors = [ref_values[i] - pred_values[i] for i in range(len(ref_values))]
    abs_errors = [abs(it) for it in errors]

    title = f"Interpolation with gap type {i + 1} [{dataset_config['gaps'][i][0]};{dataset_config['gaps'][i][1]}]"
    results = {
        "Mean Squared Error": mean_squared_error(df, imputed_dfs[i]),
        "Raw Bias": raw_bias(errors),
        "Absolute Raw Bias": abs_raw_bias(abs_errors),
        "Percent Bias": percent_bias(ref_values, pred_values),
        "Errors sum": sum_error(abs_errors),
        "Maximum error": max_error(errors),
        "Variance error": variance(df) - variance(imputed_dfs[i]),
        "Kurtosis error": kurtosis(df) - kurtosis(imputed_dfs[i]),
        "Skewness error": skewness(df) - skewness(imputed_dfs[i]),
    }
    
    saved_results.append(results)
    indexes_save.append(index)
    print(title)
    pretty_print(results, indent=1)
    plot_imputation(dfs_with_gaps[i], imputed_dfs[i], column_name, f"{imputer['title']} with gap type {i + 1}")
    print("\n\n\n")
