# Test iterative prediction

This notebook tests the iterative prediction method

In [1]:
import os
import pickle
import pandas as pd
import utilities.plotting as plots
import utilities.train_regression_model as train_reg

In [2]:
test_df = pd.read_parquet(
    "./data/experiment_datasets/experiment_3/regression/pred_hor=10.test.parquet"
)

In [3]:
test_df["cycle_id"].unique()

array([ 11,  13,  14,  40,  44,  45,  50,  78,  82,  84,  85,  87,  88,
        89,  90,  98,  99, 100, 109, 110, 111, 166, 168, 205, 207, 208,
       227, 228, 229, 236, 237, 238, 239, 251, 252, 253, 261, 281, 283,
       284, 286, 287])

In [4]:
plots.plot_cycles(test_df)

In [5]:
test_df_44 = test_df[test_df["cycle_id"] == 44]

In [6]:
runs_df = pd.read_csv("./data/runs/reg_runs_3.csv")

In [7]:
best_model_path_10 = (
    runs_df[runs_df["params.pred_hor"] == 10]
    .sort_values(by=["metrics.r_squared"], ascending=False)
    .head(1)["artifact_uri"]
    .values[0]
)
best_model_path_10

'/home/nkuechen/Documents/Thesis/mlruns/282042492777576658/959946da6fa0485c894367e907d733fc/artifacts'

In [8]:
for file in os.listdir(best_model_path_10):
    if file.endswith(".pickle"):
        best_model_10 = pickle.load(open(os.path.join(best_model_path_10, file), "rb"))

In [9]:
best_model_path_30 = (
    runs_df[runs_df["params.pred_hor"] == 30]
    .sort_values(by=["metrics.r_squared"], ascending=False)
    .head(1)["artifact_uri"]
    .values[0]
)
best_model_path_30

'/home/nkuechen/Documents/Thesis/mlruns/282042492777576658/3005a0341ec043028c42c1d949e0c16f/artifacts'

In [10]:
for file in os.listdir(best_model_path_30):
    if file.endswith(".pickle"):
        best_model_30 = pickle.load(open(os.path.join(best_model_path_30, file), "rb"))

In [11]:
from utilities.train_regression_model import iterative_prediction

In [12]:
n_inputs = 50
n_predictions = len(test_df_44) - n_inputs
n_predictions

152

In [13]:
prediction_df_44_10 = iterative_prediction(
    best_model_10, test_df_44.head(n_inputs), n_predictions, prediction_horizon=10
)
prediction_df_44_30 = iterative_prediction(
    best_model_30, test_df_44.head(n_inputs), n_predictions, prediction_horizon=30
)

prediction_start_date = test_df_44["status_time"].iloc[n_inputs]

In [14]:
plots.plot_iterative_prediction(
    test_df_44,
    [(prediction_df_44_10, "pred_hor_10"), (prediction_df_44_30, "pred_hor_30")],
    prediction_start_date,
    divergence_threshold=3,
)

Adding divergence line
Adding divergence line


In [15]:
n_inputs = 30
n_predictions = len(test_df_44) - n_inputs

prediction_df_44_10 = iterative_prediction(
    best_model_10, test_df_44.head(n_inputs), n_predictions, prediction_horizon=10
)
prediction_df_44_30 = iterative_prediction(
    best_model_30, test_df_44.head(n_inputs), n_predictions, prediction_horizon=30
)

prediction_start_date = test_df_44["status_time"].iloc[n_inputs]

plots.plot_iterative_prediction(
    test_df_44,
    [(prediction_df_44_10, "pred_hor_10"), (prediction_df_44_30, "pred_hor_30")],
    prediction_start_date,
    divergence_threshold=3,
)

Adding divergence line
Adding divergence line


In [16]:
train_reg.divergence_time_metrics(best_model_10, test_df, 10)

(8.777777777777779,
 8.030360572575438,
 8.0,
 {'09f3fdca-6cea-4471-8d80-c6dd19f46dae': [8, 8, 5],
  '0a66076f-2ef2-4b02-a856-646e09e1ee44': [11, 10, 10, 12, 12, 10],
  '2536a336-63d1-4242-b41d-716ed2664cb9': [9, 8, 7],
  '2e2261a8-e600-4969-9c3d-c0268d8f9e38': [14, 15, 13, 10, 12, 9],
  '31046270-0977-46b5-abf6-bc008e3f3b60': [8, 8, 8],
  '4c6ed69f-fc84-47f7-ad86-85ecc11242af': [8, 8, 8],
  '5517dd7b-6ab7-4a2c-90a2-95dd7727cf92': [9, 10, 10],
  '5544bb14-e08a-42e1-b020-11fc922bff34': [11, 9, 9, 10, 12, 9],
  '59a1c172-9afd-4f4e-a31f-ff6d7a70943b': [8,
   8,
   8,
   8,
   7,
   8,
   8,
   10,
   12,
   12,
   12],
  '5ee55295-f9c2-4797-bd7e-bc96f79f063b': [8, 7, 11, 18, 9, 5, 8],
  '6be265cf-8a44-42e8-89ed-3f955885e26c': [9, 8, 7, 15, 9, 4, 9, 6, 5],
  '9e983e70-2015-4dfe-9d97-ea98aacb245a': [8, 8, 7, 4, 10, 8],
  'af3548cb-ebe6-4586-87e1-065aca24840c': [12, 14, 11, 8, 12, 11, 9, 11],
  'bea4cb80-2734-4a71-8ff6-602129e653a4': [9, 7, 6, 9, 8, 6, 7, 8, 6],
  'cac585df-007e-4b01-a751-f7

In [17]:
train_reg.divergence_time_metrics(best_model_30, test_df, 30)


(4.866666666666666,
 4.630023475342681,
 5.0,
 {'09f3fdca-6cea-4471-8d80-c6dd19f46dae': [5, 5, 4],
  '0a66076f-2ef2-4b02-a856-646e09e1ee44': [5, 5, 6, 5, 5],
  '2536a336-63d1-4242-b41d-716ed2664cb9': [5, 5, 4],
  '2e2261a8-e600-4969-9c3d-c0268d8f9e38': [6, 6, 5, 5, 6, 5],
  '31046270-0977-46b5-abf6-bc008e3f3b60': [4, 4, 5],
  '4c6ed69f-fc84-47f7-ad86-85ecc11242af': [5, 5, 5],
  '5517dd7b-6ab7-4a2c-90a2-95dd7727cf92': [5, 6, 6],
  '5544bb14-e08a-42e1-b020-11fc922bff34': [5, 5, 5, 6, 4],
  '59a1c172-9afd-4f4e-a31f-ff6d7a70943b': [5, 4, 5, 7, 7, 7],
  '5ee55295-f9c2-4797-bd7e-bc96f79f063b': [4, 5, 5],
  '6be265cf-8a44-42e8-89ed-3f955885e26c': [5, 5, 4, 4, 4, 4],
  '9e983e70-2015-4dfe-9d97-ea98aacb245a': [5, 4, 5],
  'af3548cb-ebe6-4586-87e1-065aca24840c': [5, 5, 5, 5, 5],
  'bea4cb80-2734-4a71-8ff6-602129e653a4': [4, 4, 4, 4, 4],
  'cac585df-007e-4b01-a751-f7f24bbd23e3': [5, 5, 5, 4, 4, 4],
  'd78dfbe6-f1b7-403a-9ede-57997b28af07': [6, 5, 4, 4, 4, 4, 4],
  'f29c5877-2a5c-491a-a191-618d328