In [None]:
import sqlite3

# Connect to the database
conn = sqlite3.connect('OS_data_experiment_results.db')

# Create a cursor object to interact with the database
cursor = conn.cursor()

# Execute a query on the table (replace 'experiment_results' with your table name)
cursor.execute("SELECT * FROM experiment_results WHERE which_split='Test';")

# Fetch the column names
column_names = [description[0] for description in cursor.description]
print("Column names:", column_names)

# Fetch and display rows
rows = cursor.fetchall()
for row in rows:
    print(row)

In [2]:
def load_batch(data_folder):
    X_path = os.path.join(data_folder, "X")
    y_path = os.path.join(data_folder, "y")

    X_files = sorted(os.listdir(X_path))
    y_files = sorted(os.listdir(y_path))

    if not X_files == y_files:
        raise RuntimeError("Not all training files are present in both the X and y folders.")

    file_names = X_files

    X_dfs, y_dfs = [], []
    for file in file_names:
        X_dfs.append(pd.read_csv(os.path.join(X_path, file)))
        y_dfs.append(pd.read_csv(os.path.join(y_path, file)))
        
    return X_dfs, y_dfs, file_names

In [None]:
from src.methods import SingleThresholdARIMA
import os
import pandas as pd
from src.plot_functions import plot_predictions
import jsonpickle
import pickle

home = "/data/tijmen"

dataset = "OS_data" #alternatively: route_data
data_folder = os.path.join(home, dataset)
result_folder = os.path.join(home+"/results", dataset)
intermediates_folder = os.path.join(home+ "/intermediates", dataset)
model_folder = os.path.join(home + "/saved_models", dataset)

score_folder = os.path.join(result_folder, "scores")
predictions_folder = os.path.join(result_folder, "predictions")
metric_folder = os.path.join(result_folder, "metrics")

preprocessed_X_folder = os.path.join(intermediates_folder, "preprocessed_data_csvs")
label_filter_folder = os.path.join(intermediates_folder, "label_filters_per_cutoff_csvs")

test_csvs_path = '/data/tijmen/intermediates/OS_data/preprocessed_data_csvs/Test'
preprocessing_hash ='ef19085e70a2b043dd00e10361154f3ec54122c056f0a5236099c900ff889eff'
test_csvs_path = os.path.join(test_csvs_path, preprocessing_hash)
X_dfs, y_dfs, dfs_files = load_batch(test_csvs_path)

method = 'SingleThresholdARIMA'

preds_path = os.path.join('/data/tijmen/results/OS_data/predictions/Test', preprocessing_hash, method)
scores_path = os.path.join('/data/tijmen/results/OS_data/scores/Test', preprocessing_hash, 'ARIMA')


best_model_entry = cursor.execute("""
SELECT e.* 
FROM experiment_results e 
WHERE e.metric = (
    SELECT MAX(metric)
    FROM experiment_results
    WHERE method = (?) AND which_split = (?)
) AND e.method = (?)
""", (method, "Test", method))

(preprocessing_hash, hyperparameter_hash, _, _, preprocessing_hyperparameter_string_pickle, hyperparameter_string_pickle, validation_metric) = next(best_model_entry)

model_hyperparameters = jsonpickle.decode(hyperparameter_string_pickle, keys=True)
model = SingleThresholdARIMA(model_folder, preprocessing_hash, **model_hyperparameters)

model.load_model()

preds_path = os.path.join(preds_path, hyperparameter_hash, '(3, 3, 3).pickle')
with open(preds_path, 'rb') as handle:
        predictions = pickle.load(handle)
        
scores_path = os.path.join(scores_path, hyperparameter_hash, 'scores.pickle')
with open(scores_path, 'rb') as handle:
        scores = pickle.load(handle)

plot_predictions(X_dfs, y_dfs, predictions, dfs_files, model,  which_stations = [0,6,7], n_stations = 3, scores=scores)
   

In [None]:
from src.methods import SequentialEnsemble

method = 'Sequential-DoubleThresholdBS+SingleThresholdSPC'

preds_path = os.path.join('/data/tijmen/results/OS_data/predictions/Test', preprocessing_hash, method)
scores_path = os.path.join('/data/tijmen/results/OS_data/scores/Test', preprocessing_hash, method)


best_model_entry = cursor.execute("""
SELECT e.* 
FROM experiment_results e 
WHERE e.metric = (
    SELECT MAX(metric)
    FROM experiment_results
    WHERE method = (?) AND which_split = (?)
) AND e.method = (?)
""", (method, "Test", method))

(preprocessing_hash, hyperparameter_hash, _, _, preprocessing_hyperparameter_string_pickle, hyperparameter_string_pickle, validation_metric) = next(best_model_entry)

model_hyperparameters = jsonpickle.decode(hyperparameter_string_pickle, keys=True)
model = SequentialEnsemble(model_folder, preprocessing_hash, **model_hyperparameters)
model.load_model()

preds_path = os.path.join(preds_path, hyperparameter_hash, 'predictions.pickle')
with open(preds_path, 'rb') as handle:
        predictions = pickle.load(handle)
        
scores_path = os.path.join(scores_path, hyperparameter_hash, 'scores.pickle')
with open(scores_path, 'rb') as handle:
        scores = pickle.load(handle)

plot_predictions(X_dfs, y_dfs, predictions, dfs_files, model,  which_stations = [0,6,7], n_stations = 3, scores=scores)
   

In [None]:
from src.methods import SequentialEnsemble
import numpy as np

method = 'Sequential-DoubleThresholdBS+SingleThresholdARIMA'

preds_path = os.path.join('/data/tijmen/results/OS_data/predictions/Test', preprocessing_hash, method)
scores_path = os.path.join('/data/tijmen/results/OS_data/scores/Test', preprocessing_hash, method)


best_model_entry = cursor.execute("""
SELECT e.* 
FROM experiment_results e 
WHERE e.metric = (
    SELECT MAX(metric)
    FROM experiment_results
    WHERE method = (?) AND which_split = (?)
) AND e.method = (?)
""", (method, "Test", method))

(preprocessing_hash, hyperparameter_hash, _, _, preprocessing_hyperparameter_string_pickle, hyperparameter_string_pickle, validation_metric) = next(best_model_entry)

model_hyperparameters = jsonpickle.decode(hyperparameter_string_pickle, keys=True)
model = SequentialEnsemble(model_folder, preprocessing_hash, **model_hyperparameters)
model.load_model()

preds_path = os.path.join(preds_path, hyperparameter_hash, 'predictions.pickle')
with open(preds_path, 'rb') as handle:
        predictions = pickle.load(handle)
        
scores_path = os.path.join(scores_path, hyperparameter_hash, 'scores.pickle')
with open(scores_path, 'rb') as handle:
        scores = pickle.load(handle)

plot_predictions(X_dfs, y_dfs, predictions, dfs_files, model,  which_stations = list(range(60)), scores=scores)
   