In [1]:
%load_ext autoreload
%autoreload 2
from wishart import wishart_lib, wishart_lib_stepan
from motifs import motifs
import sys

sys.path.append("..")

from indexes import indexes_lib
from generator import generator_lib
import numpy as np
from matplotlib import pyplot as plt
import importlib
from scipy.interpolate import make_interp_spline, BSpline
from collections import defaultdict
from typing import List

import sys

sys.path.append("..")
importlib.reload(wishart_lib_stepan)
importlib.reload(wishart_lib)
importlib.reload(indexes_lib)
importlib.reload(generator_lib)
importlib.reload(motifs)
from collections import defaultdict
from motifs.motifs import GenerateAllMotifs, GenPatterns
import numpy as np
import pdb
import dill
# from sklearn.datasets.samples_generator import make_blobs
import random
from itertools import combinations, product
from scipy.special import gamma
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import pdist, squareform, euclidean
import matplotlib.pyplot as plt
from sklearn import datasets
from tqdm import tqdm
from math import sqrt
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.metrics import f1_score, confusion_matrix, silhouette_score, davies_bouldin_score
import seaborn as sn
from statistics import mean
from wishart.wishart_lib import Wishart
import itertools
import seaborn as sns

In [2]:
class Lorenz:
    def __init__(self, s=10, r=28, b=8 / 3):
        self.s = s
        self.r = r
        self.b = b

    #Differential equations of a Lorenz System
    def X(self, x, y, s):
        return s * (y - x)

    def Y(self, x, y, z, r):
        return (-x) * z + r * x - y

    def Z(self, x, y, z, b):
        return x * y - b * z

    #RK4 for the differential equations
    def RK4(self, x, y, z, s, r, b, dt):
        k_1 = self.X(x, y, s)
        l_1 = self.Y(x, y, z, r)
        m_1 = self.Z(x, y, z, b)

        k_2 = self.X((x + k_1 * dt * 0.5), (y + l_1 * dt * 0.5), s)
        l_2 = self.Y((x + k_1 * dt * 0.5), (y + l_1 * dt * 0.5), (z + m_1 * dt * 0.5), r)
        m_2 = self.Z((x + k_1 * dt * 0.5), (y + l_1 * dt * 0.5), (z + m_1 * dt * 0.5), b)

        k_3 = self.X((x + k_2 * dt * 0.5), (y + l_2 * dt * 0.5), s)
        l_3 = self.Y((x + k_2 * dt * 0.5), (y + l_2 * dt * 0.5), (z + m_2 * dt * 0.5), r)
        m_3 = self.Z((x + k_2 * dt * 0.5), (y + l_2 * dt * 0.5), (z + m_2 * dt * 0.5), b)

        k_4 = self.X((x + k_3 * dt), (y + l_3 * dt), s)
        l_4 = self.Y((x + k_3 * dt), (y + l_3 * dt), (z + m_3 * dt), r)
        m_4 = self.Z((x + k_3 * dt), (y + l_3 * dt), (z + m_3 * dt), b)

        x += (k_1 + 2 * k_2 + 2 * k_3 + k_4) * dt * (1 / 6)
        y += (l_1 + 2 * l_2 + 2 * l_3 + l_4) * dt * (1 / 6)
        z += (m_1 + 2 * m_2 + 2 * m_3 + m_4) * dt * (1 / 6)

        return (x, y, z)

    def generate(self, dt=0.1, steps=100000):
        #Initial values and Parameters
        x_0, y_0, z_0 = 1, 1, 1

        #RK4 iteration
        x_list = [x_0]
        y_list = [y_0]
        z_list = [z_0]

        i = 0

        while i < steps:
            x = x_list[i]
            y = y_list[i]
            z = z_list[i]

            position = self.RK4(x, y, z, self.s, self.r, self.b, dt)

            x_list.append(position[0])
            y_list.append(position[1])
            z_list.append(position[2])

            i += 1

        x_array = np.array(x_list)
        y_array = np.array(y_list)
        z_array = np.array(z_list)

        return x_array, y_array, z_array


def lorenz_generation(s=10, r=28, b=8 / 3):
    data, _, _ = Lorenz(s, r, b).generate()
    data = data[250:]
    data = (data - data.min()) / (data.max() - data.min())
    return data


def lorenz_visualisation(data):
    plt.figure(figsize=(20, 8))
    plt.plot(data[:2500])
    plt.xticks([i for i in range(0, 2500, 100)])
    plt.grid()
    plt.show()

In [3]:
def get_val_for_pattern_and_pos(data: np.array, pattern: list, pos: int, bad):
    val = []
    sum = 0
    for i in range(len(pattern) - 1, -1, -1):
        sum += pattern[i]
        val.append(data[pos - sum])
        if bad[pos - sum]:
            return np.array([])
    val = val[::-1]
    return np.array(val)

class Daemon:
    def __init__(self, mode="simple", is_pred=True, quantiles=(0, 1), gap=0.05):
        self.mode = mode
        self.is_pred = is_pred
        self.quantiles = quantiles
        self.gap = gap

    def mean_d(self, preds):
        sum_weight = sum(map(lambda x: x[1], preds))
        s = sum(map(lambda x: x[0] * x[1], preds))
        return s / sum_weight

    def mean_q(self, preds):
        sum_weight = sum(map(lambda x: x[2], preds))
        s = sum(map(lambda x: x[0] * x[2], preds))
        return s / sum_weight

    def mean_d_q(self, preds):
        cleaned = []
        vals = np.array(preds)[:, 0]
        df = pd.DataFrame(vals)
        low = df[0].quantile(self.quantiles[0])
        high = df[0].quantile(self.quantiles[1])
        for elem in preds:
            if low <= elem[0] <= high:
                cleaned.append(elem)
        if len(cleaned) == 0:
            return None
        sum_weight = sum(map(lambda x: x[1] * x[2], cleaned))
        s = sum(map(lambda x: x[0] * x[1] * x[2], cleaned))
        return s / sum_weight

    def predict(self, possible_values):
        if self.is_pred and not self.is_predictable(possible_values):
            return None
        if self.mode == "simple":
            return np.mean(list(map(lambda x: x[0], possible_values)))
        elif self.mode == "simple_d":
            return self.mean_d(possible_values)
        elif self.mode == "simple_q":
            return self.mean_q(possible_values)
        else:
            return self.mean_d_q(possible_values)
        # return self.mean_q(self.possible_values)

    def is_predictable(self, possible_values):
        vals = np.array(possible_values)[:, 0]
        df = pd.DataFrame(vals)
        low = df[0].quantile(self.quantiles[0])
        high = df[0].quantile(self.quantiles[1])
        if high - low > self.gap:
            return False
        return True


class IdealDeamon(object):
    def __init__(self, real_vals, eps=0.05, mode='simple'):
        self.eps = eps
        self.mode = mode 
        self.real_vals=real_vals
        # self.predictions = po
        
    @property
    def label(self):
        return 'Ideal model of demon'

    def predict(self, start_point, step, prediction):
        if abs(prediction - self.real_vals[start_point + step]) > self.eps:
            return None
        return prediction
    
    def is_predictable(self, start_point, step, prediction):
       return abs(prediction - self.real_vals[start_point + step]) <= self.eps


def base_prediction(data, daemon: Daemon, h: int, L: int = 3, kmax: int = 10, eps: float = 0.1, QVALUE=0.99,
                    return_possible_values=False):
    t = len(data)
    prediction = np.zeros(shape=(t + h, 2))
    bad = np.array([0 for i in range(t + h)])
    for i in range(t):
        prediction[i][0] = data[i]
        prediction[i][1] = 1
    possible_values = [[] for i in range(h)]

    steps = 0
    for i in range(h):

        for pattern in GenPatterns(L - 1, kmax):
            val_for_pattern_with_q = get_val_for_pattern_and_pos(prediction, pattern, t + i, bad)

            if len(val_for_pattern_with_q) == 0:
                continue
            val_for_pattern = val_for_pattern_with_q[:, 0]
            val_q = val_for_pattern_with_q[:, 1]

            for c in centers[pattern]:
                if len(c) == 0:
                    continue
                steps += 1
                dist = np.linalg.norm(c[:-1] - val_for_pattern)
                if dist < eps:
                    weight_d = (eps - dist) / eps
                    weight_q = np.mean(val_q) * QVALUE
                    possible_values[i].append([c[-1], weight_d, weight_q])
                #   possible_values[i].append([c[-1], weight_d, weight_q])

        if len(possible_values[i]):
            pred = daemon.predict(possible_values[i])
            if pred is not None:
                prediction[t + i][0] = pred
            else:
                bad[t + i] = 1
                prediction[t + i][0] = 0
            prediction[t + i][1] = np.mean(list(map(lambda x: x[2], possible_values[i])))
        else:
            bad[t + i] = 1
            prediction[t + i][0] = 0

    
        # print(prediction[t + i])
    # print(steps)
    if return_possible_values:
        return [prediction, bad, possible_values]
    return [prediction, bad]

def base_prediction_ideal(data, daemon: Daemon, ideal_daemon: IdealDeamon, h: int, L: int = 3, kmax: int = 10, eps: float = 0.1, QVALUE = 0.99,
                    return_possible_values=False):
    t = len(data)
    prediction = np.zeros(shape=(t + h, 2))
    bad = np.array([0 for i in range(t + h)])
    for i in range(t):
        prediction[i][0] = data[i]
        prediction[i][1] = 1
    possible_values = [[] for i in range(h)]

    steps = 0
    for i in range(h):

        for pattern in GenPatterns(L - 1, kmax):
            val_for_pattern_with_q = get_val_for_pattern_and_pos(prediction, pattern, t + i, bad)

            if len(val_for_pattern_with_q) == 0:
                continue
            val_for_pattern = val_for_pattern_with_q[:, 0]
            val_q = val_for_pattern_with_q[:, 1]


            for c in centers[pattern]:
              if len(c) == 0:
                  continue
              steps += 1
              dist = np.linalg.norm(c[:-1] - val_for_pattern)
              if dist < eps:
                  weight_d = (eps - dist) / eps;
                  weight_q = np.mean(val_q) * QVALUE
                  possible_values[i].append([c[-1], weight_d, weight_q])
                #   possible_values[i].append([c[-1], weight_d, weight_q])
                
        if len(possible_values[i]):
            pred = ideal_daemon.predict(0, i, daemon.predict(possible_values[i]))
            if pred is not None:
                prediction[t + i][0] = pred
            else:
                bad[t + i] = 1
                prediction[t + i][0] = 0
            prediction[t + i][1] = np.mean(list(map(lambda x: x[2], possible_values[i])))
        else:
            bad[t + i] = 1
            prediction[t + i][0] = 0

    print(steps)
    return [prediction, bad, possible_values]


def get_mae(preds, actual, bad, h, pref):
    mae = 0
    cnt = 0
    for i in range(pref):
        if bad[i]:
            continue
        # print(actual[i], " ", preds[i][0])
        mae += abs(actual[i] - preds[i][0])
        cnt += 1
    if cnt == 0:
        return 0
    return mae / cnt


def get_mse(preds, actual, bad, h, pref):
    mae = 0
    cnt = 0
    for i in range(pref):
        if bad[i]:
            continue
        mae += abs(actual[i] - preds[i][0]) ** 2
        cnt += 1
    if cnt == 0:
        return 0
    return mae / cnt


def get_rmse(preds, actual, bad, h, pref):
    return get_mse(preds, actual, bad, h, pref) ** 0.5


def get_amount_of_predictable_pts(preds, actual, bad, h, pref):
    return sum(bad[:pref]) / pref * 100.0


def smooth_plot(x, y, plt):
    xnew = np.linspace(x.min(), x.max(), 1000)
    spl = make_interp_spline(x, np.array(y), k=3)
    power_smooth = spl(xnew)
    line, = plt.plot(xnew, power_smooth)
    return line


def close_motifs(motif1, motif2, eps=0.01):
    dist = np.linalg.norm(motif1 - motif2)
    return dist < eps


def filter_patterns(patterns):
    new_patterns = []
    for el in list(patterns):
        if not (1 in el):
            new_patterns.append(el)
    return new_patterns


def load_centers(r=28.0, centers=dict(), suffix=""):
    loaded = np.load(f"centers{suffix}/{r}.npy", allow_pickle=True)
    for k, v in loaded:
        if k not in list(centers.keys()):
            centers[k] = v
        else:
            for el in v:
                centers[k].append(el)

    return centers


def calculate_metrics(prediction, bad, test_data, h):
    _prediction = prediction[-h:]
    _bad = bad[-h:]
    horizons = [1, 10, 50, 100]
    mse_val = [get_rmse(_prediction, test_data, _bad, h, i) for i in horizons]
    mae_val = [get_mae(_prediction, test_data, _bad, h, i) for i in horizons]
    pred_cnt = [get_amount_of_predictable_pts(_prediction, test_data, _bad, h, i) for i in horizons]

    metrics = dict()
    for i, h_temp in enumerate(horizons):
        metrics[h_temp] = [round(pred_cnt[i], 2), round(mae_val[i], 3), round(mse_val[i], 3)] 
    
    df_results = pd.DataFrame(metrics).set_index(pd.Index(["NP(%)", "MAE", "RMSE"]))
    temp = np.array(df_results.T.values.flatten())
    horizons = np.array(["1", "1", "1", "10", "10", "10", "50", "50", "50", "100", "100", "100"])
    metrics_names = np.array(["NP(%)", "MAE", "RMSE"] * 4)
    final_df = pd.DataFrame(data=[], columns=pd.MultiIndex.from_tuples(zip(horizons, metrics_names)))
    final_df.loc[0] = temp
    return final_df

def get_combinations(numbers, length):
    # Generate all possible combinations of the given length
    combinations = list(itertools.combinations(numbers, length))

    # Convert tuples to lists and remove duplicates
    combinations = [list(comb) for comb in combinations]
    combinations = list(set(tuple(sorted(comb)) for comb in combinations))

    return combinations

In [9]:
kmax = 10
L = 4
TRAIN_SIZE = 10_000
TEST_SIZE = 1000
S = 10
B = 8 / 3
r = 28
auxiliary_r = [27.99,28.01,27.98,28.02,27.97,28.03,27.96,28.04,27.95,28.05,27.94,28.06,27.93,28.07,27.92,28.08,27.91,28.09]

WISHART_R = 10
WISHART_U = 0.2

In [38]:
centers = dict()

# Loading the base time series
loaded = np.load('centers/28.0.npy', allow_pickle=True)
for k, v in loaded:
    centers[k] = v

# Loading auxiliary time series
# loaded = np.load('centers/28.04.npy', allow_pickle=True)
# for k, v in loaded:
#     if k not in list(centers.keys()):
#         centers[k] = v
#     else:
#         for el in v:
#             centers[k].append(el)

# loaded = np.load('centers/28.02.npy', allow_pickle=True)
# for k, v in loaded:
#     if k not in list(centers.keys()):
#         centers[k] = v
#     else:
#         for el in v:
#             centers[k].append(el)

In [47]:
o_data = lorenz_generation(S, r, B)
train_data = o_data[:TRAIN_SIZE]
test_data = o_data[TRAIN_SIZE:TRAIN_SIZE + TEST_SIZE]
h = 100
prediction, bad, possible = base_prediction(train_data, Daemon(mode="simple_q", is_pred=False, quantiles=(0.05, 0.95)), h, L=4, eps=0.009, return_possible_values=True)
# calculate_metrics(prediction, bad, test_data, h)

In [None]:
prediction = prediction[-h:]
bad = bad[-h:]

mse_val_baseline = [get_rmse(prediction, test_data, bad, h, i) for i in range(1, h + 1)]
pred_cnt_baseline = [get_amount_of_predictable_pts(prediction, test_data, bad, h, i) for i in range(1, h + 1)]

In [46]:
np.save("metrics/rmse_baseline.npy", mse_val_baseline)
np.save("metrics/bad_baseline.npy", pred_cnt_baseline)

#### Runing predictions for multiple r

In [29]:
o_data = lorenz_generation(S, r, B)
train_data = o_data[:TRAIN_SIZE]
test_data = o_data[TRAIN_SIZE:TRAIN_SIZE + TEST_SIZE]
df_arr = []
rmse_arr = []
bad_arr = []

for i in tqdm(range(len(auxiliary_r))):
    centers = dict()

    # Loading the base time series
    loaded = np.load('centers/28.0.npy', allow_pickle=True)
    for k, v in loaded:
        centers[k] = v
    centers = load_centers(r=auxiliary_r[i], centers=centers)
    h = 100
    prediction, bad, possible = base_prediction(train_data, Daemon(mode="simple_d_q", is_pred=False, quantiles=(0.01, 0.99)), h, L=4, eps=0.009, return_possible_values=True)
    
    df_arr.append(calculate_metrics(prediction, bad, test_data, h))
    
    bad_temp = bad[-h:]
    pred_temp = prediction[-h:]

    rmse_val = [get_rmse(pred_temp, test_data, bad_temp, h, i) for i in range(1, h + 1)]
    pred_cnt = [get_amount_of_predictable_pts(pred_temp, test_data, bad_temp, h, i) for i in range(1, h + 1)]

    rmse_arr.append(rmse_val)
    bad_arr.append(pred_cnt)

100%|██████████| 18/18 [01:29<00:00,  5.00s/it]


In [30]:
final_df = pd.concat(df_arr)
final_df["r_val"] = auxiliary_r 
final_df.set_index("r_val", inplace = True,
                            append = True, drop = True)
final_df

Unnamed: 0_level_0,Unnamed: 1_level_0,1,1,1,10,10,10,50,50,50,100,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,NP(%),MAE,RMSE,NP(%),MAE,RMSE,NP(%),MAE,RMSE,NP(%),MAE,RMSE
Unnamed: 0_level_2,r_val,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
0,27.99,0.0,0.01,0.01,0.0,0.021,0.027,10.0,0.143,0.192,30.0,0.211,0.259
0,28.01,0.0,0.008,0.008,20.0,0.049,0.084,52.0,0.105,0.15,76.0,0.105,0.15
0,27.98,0.0,0.001,0.001,0.0,0.021,0.031,16.0,0.127,0.174,32.0,0.204,0.253
0,28.02,0.0,0.002,0.002,0.0,0.025,0.035,6.0,0.151,0.211,11.0,0.178,0.226
0,27.97,0.0,0.013,0.013,30.0,0.027,0.035,62.0,0.093,0.135,81.0,0.093,0.135
0,28.03,0.0,0.012,0.012,0.0,0.01,0.012,14.0,0.131,0.228,17.0,0.197,0.29
0,27.96,0.0,0.01,0.01,0.0,0.029,0.035,20.0,0.166,0.219,42.0,0.186,0.233
0,28.04,0.0,0.032,0.032,0.0,0.039,0.055,26.0,0.179,0.229,63.0,0.179,0.229
0,27.95,0.0,0.004,0.004,0.0,0.029,0.039,8.0,0.191,0.24,8.0,0.222,0.273
0,28.05,0.0,0.018,0.018,0.0,0.032,0.04,6.0,0.172,0.216,24.0,0.221,0.261


In [31]:
final_df.describe().loc["mean"].values

array([0.00000000e+00, 1.26111111e-02, 1.26111111e-02, 5.00000000e+00,
       2.92222222e-02, 3.93333333e-02, 1.93333333e+01, 1.49277778e-01,
       2.04055556e-01, 3.71111111e+01, 1.79944444e-01, 2.30500000e-01])

In [32]:
rmse_arr = np.array(rmse_arr)
bad_arr = np.array(bad_arr)

In [35]:
np.save("metrics/rmse_1.npy", rmse_arr)
np.save("metrics/bad_1.npy", bad_arr)
final_df.to_csv("table_mean.csv")

#### Multiple r simultaneously. AUXILIARY_SET = 2

In [206]:
combinations_length = 2
o_data = lorenz_generation(S, r, B)
train_data = o_data[:TRAIN_SIZE]
test_data = o_data[TRAIN_SIZE:TRAIN_SIZE + TEST_SIZE]
df_arr = []
rmse_arr_2 = []
bad_arr_2 = []
r_combinations = get_combinations(auxiliary_r, combinations_length)

for i in tqdm(range(len(r_combinations))):
    centers = dict()

    # Loading the base time series
    loaded = np.load('centers/28.0.npy', allow_pickle=True)
    for k, v in loaded:
        centers[k] = v

    for r_temp in r_combinations[i]:
        centers = load_centers(r=r_temp, centers=centers)
    h = 100
    prediction, bad, possible = base_prediction(train_data, Daemon(mode="simple_d_q", is_pred=False, quantiles=(0.01, 0.99)), h, L=4, eps=0.009, return_possible_values=True)
    
    df_arr.append(calculate_metrics(prediction, bad, test_data, h))
    
    bad_temp = bad[-h:]
    pred_temp = prediction[-h:]

    rmse_val = [get_rmse(pred_temp, test_data, bad_temp, h, i) for i in range(1, h + 1)]
    pred_cnt = [get_amount_of_predictable_pts(pred_temp, test_data, bad_temp, h, i) for i in range(1, h + 1)]

    rmse_arr_2.append(rmse_val)
    bad_arr_2.append(pred_cnt)

100%|██████████| 153/153 [23:50<00:00,  9.35s/it]


In [187]:
final_df_q_2 = pd.concat(df_arr)
final_df_q_2["r_val"] = r_combinations 
final_df_q_2.set_index("r_val", inplace = True,
                            append = True, drop = True)
final_df_q_2

Unnamed: 0_level_0,Unnamed: 1_level_0,1,1,1,10,10,10,50,50,50,100,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,NP(%),MAE,RMSE,NP(%),MAE,RMSE,NP(%),MAE,RMSE,NP(%),MAE,RMSE
Unnamed: 0_level_2,r_val,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
0,"(27.97, 27.99)",0.0,0.015,0.015,0.0,0.024,0.030,16.0,0.150,0.192,13.0,0.226,0.277
0,"(27.96, 28.01)",0.0,0.012,0.012,0.0,0.037,0.049,20.0,0.143,0.187,57.0,0.151,0.194
0,"(27.98, 28.07)",0.0,0.017,0.017,0.0,0.026,0.034,4.0,0.112,0.156,15.0,0.175,0.225
0,"(27.97, 28.06)",0.0,0.017,0.017,0.0,0.024,0.032,10.0,0.172,0.231,49.0,0.183,0.237
0,"(27.99, 28.02)",0.0,0.007,0.007,0.0,0.023,0.029,6.0,0.199,0.249,26.0,0.221,0.265
0,...,...,...,...,...,...,...,...,...,...,...,...,...
0,"(27.98, 27.99)",0.0,0.005,0.005,0.0,0.021,0.029,2.0,0.142,0.200,14.0,0.201,0.276
0,"(27.92, 27.97)",0.0,0.010,0.010,10.0,0.027,0.034,28.0,0.187,0.246,40.0,0.213,0.259
0,"(27.98, 28.06)",0.0,0.012,0.012,0.0,0.023,0.030,10.0,0.141,0.220,8.0,0.197,0.264
0,"(27.92, 28.04)",0.0,0.028,0.028,0.0,0.037,0.053,48.0,0.126,0.188,74.0,0.126,0.188


In [152]:
final_df_q_2.to_csv("table_mean_2.csv")

In [207]:
rmse_arr_2 = np.array(rmse_arr_2)
bad_arr_2 = np.array(bad_arr_2)

In [208]:
np.save("metrics/rmse_2.npy", rmse_arr_2)
np.save("metrics/bad_2.npy", bad_arr_2)

#### Multiple r simultaneously. AUXILIARY_SET = 3

In [196]:
combinations_length = 3
o_data = lorenz_generation(S, r, B)
train_data = o_data[:TRAIN_SIZE]
test_data = o_data[TRAIN_SIZE:TRAIN_SIZE + TEST_SIZE]
df_arr = []
rmse_arr_3 = []
bad_arr_3 = []
r_combinations = get_combinations(auxiliary_r, combinations_length)

for i in tqdm(range(len(r_combinations))):
    centers = dict()

    # Loading the base time series
    loaded = np.load('centers/28.0.npy', allow_pickle=True)
    for k, v in loaded:
        centers[k] = v

    for r_temp in r_combinations[i]:
        centers = load_centers(r=r_temp, centers=centers)
    h = 100
    prediction, bad, possible = base_prediction(train_data, Daemon(mode="simple_d_q", is_pred=False), h, L=4, eps=0.009, return_possible_values=True)
    
    df_arr.append(calculate_metrics(prediction, bad, test_data, h))
    
    bad_temp = bad[-h:]
    pred_temp = prediction[-h:]

    rmse_val = [get_rmse(pred_temp, test_data, bad_temp, h, i) for i in range(1, h + 1)]
    pred_cnt = [get_amount_of_predictable_pts(pred_temp, test_data, bad_temp, h, i) for i in range(1, h + 1)]

    rmse_arr_3.append(rmse_val)
    bad_arr_3.append(pred_cnt)

final_df = pd.concat(df_arr)
final_df["r_val"] = r_combinations 
final_df.set_index("r_val", inplace = True,
                            append = True, drop = True)
final_df

100%|██████████| 816/816 [3:51:58<00:00, 17.06s/it]  


Unnamed: 0_level_0,Unnamed: 1_level_0,1,1,1,10,10,10,50,50,50,100,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,NP(%),MAE,RMSE,NP(%),MAE,RMSE,NP(%),MAE,RMSE,NP(%),MAE,RMSE
Unnamed: 0_level_2,r_val,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
0,"(27.91, 27.94, 27.95)",0.0,0.017,0.017,0.0,0.037,0.049,0.0,0.170,0.207,0.0,0.199,0.240
0,"(27.93, 27.95, 28.01)",0.0,0.022,0.022,0.0,0.040,0.052,2.0,0.165,0.218,5.0,0.217,0.264
0,"(27.92, 27.95, 28.08)",0.0,0.012,0.012,0.0,0.054,0.072,0.0,0.193,0.244,3.0,0.253,0.292
0,"(27.98, 28.03, 28.08)",0.0,0.013,0.013,0.0,0.034,0.048,2.0,0.119,0.156,1.0,0.169,0.213
0,"(27.92, 27.97, 28.04)",0.0,0.030,0.030,0.0,0.043,0.058,10.0,0.182,0.245,8.0,0.209,0.262
0,...,...,...,...,...,...,...,...,...,...,...,...,...
0,"(27.97, 28.05, 28.07)",0.0,0.028,0.028,0.0,0.042,0.050,0.0,0.180,0.223,23.0,0.195,0.237
0,"(27.91, 27.99, 28.03)",0.0,0.019,0.019,0.0,0.022,0.029,0.0,0.146,0.192,0.0,0.219,0.270
0,"(28.02, 28.05, 28.07)",0.0,0.023,0.023,0.0,0.036,0.045,0.0,0.181,0.221,0.0,0.199,0.239
0,"(27.93, 28.01, 28.02)",0.0,0.020,0.020,0.0,0.032,0.041,4.0,0.160,0.213,2.0,0.237,0.300


In [163]:
final_df.to_csv("table_mean_3.csv")

In [198]:
rmse_arr_3 = np.array(rmse_arr_3)
bad_arr_3 = np.array(bad_arr_3)

In [202]:
np.save("metrics/rmse_3.npy", rmse_arr_3)
np.save("metrics/bad_3.npy", bad_arr_3)