In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split

from train import * 
from train_optimal import *

import warnings
warnings.filterwarnings('ignore')

In [5]:
import os 

In [6]:
current_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(current_dir, '..', '..'))

data_folder_path = os.path.join(root_dir, 'data')

file_path = os.path.join(data_folder_path, 'robotarm.csv')

df = pd.read_csv(file_path)

## Baseline results

In [117]:
args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "error",
            "prediction": "success",
            "one_hot": False,
            "k": 5,
            "data": df
        }

results1 = cross_validate(**args)
results1

[{'Mean AUC': '0.85',
  'Standard Deviation of AUC': '0.02',
  'Mean Accuracy': '0.85',
  'Standard Deviation of Accuracy': '0.03'}]

In [116]:

args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "error",
            "prediction": "success",
            "one_hot": False,
            "k": 5,
            "data": df
        }

results2 = cross_validate(**args)
results2

[{'Mean AUC': '0.83',
  'Standard Deviation of AUC': '0.01',
  'Mean Accuracy': '0.81',
  'Standard Deviation of Accuracy': '0.02'}]

In [79]:
args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "case",
            "prediction": "success",
            "one_hot": False,
            "k": 5,
            "data": df
        }

results3 = cross_validate(**args)
results3

[{'Mean AUC': '0.83',
  'Standard Deviation of AUC': '0.02',
  'Mean Accuracy': '0.83',
  'Standard Deviation of Accuracy': '0.02'}]

In [80]:
args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "case",
            "prediction": "success",
            "one_hot": False,
            "k": 5,
            "data": df
        }

results4 = cross_validate(**args)
results4

[{'Mean AUC': '0.86',
  'Standard Deviation of AUC': '0.03',
  'Mean Accuracy': '0.85',
  'Standard Deviation of Accuracy': '0.02'}]

### Sequence truncation

In [100]:
method_params = [3, 4, 5, 6, 7, 8, 9, 10]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "first",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 3
[{'Mean AUC': '0.68', 'Standard Deviation of AUC': '0.07', 'Mean Accuracy': '0.73', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 4
[{'Mean AUC': '0.71', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.72', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 5
[{'Mean AUC': '0.74', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.75', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 6
[{'Mean AUC': '0.77', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.75', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 7
[{'Mean AUC': '0.78', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.78', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 8
[{'Mean AUC': '0.80', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.79', 'Standard Deviation of Ac

In [101]:
method_params = [1, 2, 3, 4, 5]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "both",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 1
[{'Mean AUC': '0.58', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.68', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 2
[{'Mean AUC': '0.50', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.68', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 3
[{'Mean AUC': '0.49', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.68', 'Standard Deviation of Accuracy': '0.04'}]


With method mode last and method param 4
[{'Mean AUC': '0.47', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 5
[{'Mean AUC': '0.49', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.03'}]




In [102]:
method_params = [2, 3, 4, 5, 6, 7, 8, 9, 10]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "last",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.57', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.72', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 3
[{'Mean AUC': '0.82', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.82', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 4
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.81', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 5
[{'Mean AUC': '0.83', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.82', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 6
[{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 7
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.82', 'Standard Deviation of Ac

In [103]:
method_params = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "last",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 1
[{'Mean AUC': '0.56', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.72', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 2
[{'Mean AUC': '0.57', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.72', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 3
[{'Mean AUC': '0.83', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.82', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 4
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.00', 'Mean Accuracy': '0.81', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 5
[{'Mean AUC': '0.83', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 6
[{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.83', 'Standard Deviation of Ac

In [105]:
method_params = [2, 3, 4, 5, 6, 7, 8, 9, 10]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "case",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "first",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.64', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 3
[{'Mean AUC': '0.67', 'Standard Deviation of AUC': '0.07', 'Mean Accuracy': '0.72', 'Standard Deviation of Accuracy': '0.04'}]


With method mode last and method param 4
[{'Mean AUC': '0.70', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.71', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 5
[{'Mean AUC': '0.75', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.76', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 6
[{'Mean AUC': '0.79', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.78', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 7
[{'Mean AUC': '0.79', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.79', 'Standard Deviation of Ac

In [106]:
method_params = [1, 2, 3, 4, 5]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "case",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "both",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 1
[{'Mean AUC': '0.59', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.68', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 2
[{'Mean AUC': '0.50', 'Standard Deviation of AUC': '0.05', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 3
[{'Mean AUC': '0.47', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.68', 'Standard Deviation of Accuracy': '0.04'}]


With method mode last and method param 4
[{'Mean AUC': '0.48', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.70', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 5
[{'Mean AUC': '0.50', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.70', 'Standard Deviation of Accuracy': '0.02'}]




In [107]:
method_params = [2, 3, 4, 5, 6, 7, 8, 9, 10]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "distribution",
            "task": "case",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "last",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.56', 'Standard Deviation of AUC': '0.06', 'Mean Accuracy': '0.72', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 3
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.82', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 4
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 5
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.84', 'Standard Deviation of Accuracy': '0.01'}]


With method mode last and method param 6
[{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.85', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 7
[{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.83', 'Standard Deviation of Ac

In [109]:
method_params = [2, 3, 4, 5, 6, 7, 8]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "first",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.64', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 3
[{'Mean AUC': '0.70', 'Standard Deviation of AUC': '0.06', 'Mean Accuracy': '0.73', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 4
[{'Mean AUC': '0.71', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.74', 'Standard Deviation of Accuracy': '0.04'}]


With method mode last and method param 5
[{'Mean AUC': '0.78', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.77', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 6
[{'Mean AUC': '0.80', 'Standard Deviation of AUC': '0.05', 'Mean Accuracy': '0.79', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 7
[{'Mean AUC': '0.79', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.80', 'Standard Deviation of Ac

In [110]:
method_params = [1, 2, 3, 4, 5]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "both",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 1
[{'Mean AUC': '0.59', 'Standard Deviation of AUC': '0.07', 'Mean Accuracy': '0.67', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 2
[{'Mean AUC': '0.53', 'Standard Deviation of AUC': '0.06', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 3
[{'Mean AUC': '0.49', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.68', 'Standard Deviation of Accuracy': '0.04'}]


With method mode last and method param 4
[{'Mean AUC': '0.49', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 5
[{'Mean AUC': '0.51', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.02'}]




In [111]:
args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "error",
            "prediction": "success",
            "one_hot": False,
            "k": 5,
            "data": df
        }

baseline_results = cross_validate(**args)


method_params = [2, 3, 4, 5, 6, 7, 8]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "error",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "last",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.61', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.75', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 3
[{'Mean AUC': '0.86', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 4
[{'Mean AUC': '0.86', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 5
[{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.85', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 6
[{'Mean AUC': '0.86', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.85', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 7
[{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.86', 'Standard Deviation of Ac

In [112]:
method_params = [2, 3, 4, 5, 6, 7, 8]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "case",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "first",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.62', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.70', 'Standard Deviation of Accuracy': '0.04'}]


With method mode last and method param 3
[{'Mean AUC': '0.70', 'Standard Deviation of AUC': '0.08', 'Mean Accuracy': '0.73', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 4
[{'Mean AUC': '0.73', 'Standard Deviation of AUC': '0.05', 'Mean Accuracy': '0.74', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 5
[{'Mean AUC': '0.79', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.78', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 6
[{'Mean AUC': '0.82', 'Standard Deviation of AUC': '0.05', 'Mean Accuracy': '0.81', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 7
[{'Mean AUC': '0.81', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.82', 'Standard Deviation of Ac

In [113]:
method_params = [1, 2, 3, 4, 5]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "case",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "both",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 1
[{'Mean AUC': '0.59', 'Standard Deviation of AUC': '0.06', 'Mean Accuracy': '0.67', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 2
[{'Mean AUC': '0.49', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 3
[{'Mean AUC': '0.51', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.69', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 4
[{'Mean AUC': '0.50', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.70', 'Standard Deviation of Accuracy': '0.03'}]


With method mode last and method param 5
[{'Mean AUC': '0.52', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.70', 'Standard Deviation of Accuracy': '0.03'}]




In [114]:
method_params = [2, 3, 4, 5, 6, 7, 8]

for n in method_params:
    args = {
            "model_type": "rf",
            "split_type": "median",
            "task": "case",
            "one_hot": False,
            "k": 5,
            "sequence_method": "truncate",
            "method_param": n, 
            "method_mode": "last",
            "data": df
        }

    results = cross_validate_optimized(**args)
    print("With method mode last and method param", n)
    print(results)
    print("\n")

With method mode last and method param 2
[{'Mean AUC': '0.62', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.75', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 3
[{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.01', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 4
[{'Mean AUC': '0.87', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.84', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 5
[{'Mean AUC': '0.87', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.86', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 6
[{'Mean AUC': '0.87', 'Standard Deviation of AUC': '0.04', 'Mean Accuracy': '0.87', 'Standard Deviation of Accuracy': '0.02'}]


With method mode last and method param 7
[{'Mean AUC': '0.86', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.87', 'Standard Deviation of Ac

In [9]:
# Function to compare results and find the closest match
def find_closest_match(baseline_results, method_results, metric="Mean AUC"):
    baseline_metric = float(baseline_results[0][metric])
    closest_n = None
    closest_diff = float('inf')
    optimal_diff = float(baseline_results[0]["Standard Deviation of AUC"])
    
    for n, results in method_results.items():
        method_metric = float(results[0][metric])
        diff = abs(baseline_metric - method_metric)
        if diff < optimal_diff:
            closest_n = n
            return closest_n, closest_diff
        else:
            if diff < closest_diff:
                closest_n = n
                closest_diff = diff
    
    return closest_n, closest_diff
            

# Calculate baseline results
args = {
    "model_type": "rf",
    "split_type": "median",
    "task": "error",
    "prediction": "success",
    "one_hot": False,
    "k": 5,
    "data": df
}
baseline_results = cross_validate(**args)
print("Baseline Results:", baseline_results)

# Store method results
method_params = [2, 3, 4, 5, 6, 7, 8]
method_results = {}

for n in method_params:
    args = {
        "model_type": "rf",
        "split_type": "median",
        "task": "error",
        "one_hot": False,
        "k": 5,
        "sequence_method": "truncate",
        "method_param": n, 
        "method_mode": "last",
        "data": df
    }

    results = cross_validate_optimized(**args)
    method_results[n] = results
    #print(f"With method mode last and method param {n}")
    #print(results)
    #print("\n")

# Find the closest match
closest_n, closest_diff = find_closest_match(baseline_results, method_results)
print(f"The parameter n that gives results closest to the baseline is {closest_n} with a difference of {closest_diff:.2f}")

Baseline Results: [{'Mean AUC': '0.85', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.84', 'Standard Deviation of Accuracy': '0.03'}]
The parameter n that gives results closest to the baseline is 3 with a difference of 0.24


In [133]:
def compare_results(args, method_mode):
    
    baseline_results = cross_validate(**args)
    print("Baseline Results:", baseline_results)

    # Store method results
    method_params = [2, 3, 4, 5, 6, 7, 8]
    method_results = {} 
    
    args_trunc = args.copy()
    args_trunc["sequence_method"] = "truncate"
    args_trunc["method_mode"] = method_mode
    args_trunc = {k: v for k, v in args_trunc.items() if k != "prediction"}
    
    for n in method_params:
        results = cross_validate_optimized(**args_trunc, method_param=n)
        method_results[n] = results
        
    # Find the closest match
    closest_n, closest_diff = find_closest_match(baseline_results, method_results)
    print(f"The parameter n that gives results closest to the baseline is {closest_n} with a difference of {closest_diff:.2f}")
    
    args_to_save = {k: v for k, v in args.items() if k != "data"}
    
    results = {"args": args_to_save, "baseline_AUC": baseline_results[0]["Mean AUC"], "closest_n": closest_n, "closest_diff": closest_diff}
    
    return results

In [134]:
args = {
    "model_type": "rf",
    "split_type": "median",
    "task": "error",
    "prediction": "success",
    "one_hot": False,
    "k": 5,
    "data": df
}

compare_results(args, "last")

Baseline Results: [{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.84', 'Standard Deviation of Accuracy': '0.03'}]
The parameter n that gives results closest to the baseline is 3 with a difference of 0.23


{'args': {'model_type': 'rf',
  'split_type': 'median',
  'task': 'error',
  'prediction': 'success',
  'one_hot': False,
  'k': 5},
 'baseline_AUC': '0.84',
 'closest_n': 3,
 'closest_diff': 0.22999999999999998}

In [135]:
args = {
    "model_type": "rf",
    "split_type": "median",
    "task": "case",
    "prediction": "success",
    "one_hot": False,
    "k": 5,
    "data": df
}

compare_results(args, "last")

Baseline Results: [{'Mean AUC': '0.86', 'Standard Deviation of AUC': '0.03', 'Mean Accuracy': '0.86', 'Standard Deviation of Accuracy': '0.02'}]
The parameter n that gives results closest to the baseline is 3 with a difference of 0.24


{'args': {'model_type': 'rf',
  'split_type': 'median',
  'task': 'case',
  'prediction': 'success',
  'one_hot': False,
  'k': 5},
 'baseline_AUC': '0.86',
 'closest_n': 3,
 'closest_diff': 0.24}

In [136]:
args = {
    "model_type": "rf",
    "split_type": "distribution",
    "task": "error",
    "prediction": "success",
    "one_hot": False,
    "k": 5,
    "data": df
}

compare_results(args, "last")

Baseline Results: [{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.81', 'Standard Deviation of Accuracy': '0.02'}]
The parameter n that gives results closest to the baseline is 3 with a difference of 0.27


{'args': {'model_type': 'rf',
  'split_type': 'distribution',
  'task': 'error',
  'prediction': 'success',
  'one_hot': False,
  'k': 5},
 'baseline_AUC': '0.84',
 'closest_n': 3,
 'closest_diff': 0.27}

In [137]:
args = {
    "model_type": "rf",
    "split_type": "distribution",
    "task": "case",
    "prediction": "success",
    "one_hot": False,
    "k": 5,
    "data": df
}

compare_results(args, "last")

Baseline Results: [{'Mean AUC': '0.84', 'Standard Deviation of AUC': '0.02', 'Mean Accuracy': '0.83', 'Standard Deviation of Accuracy': '0.02'}]
The parameter n that gives results closest to the baseline is 3 with a difference of 0.28


{'args': {'model_type': 'rf',
  'split_type': 'distribution',
  'task': 'case',
  'prediction': 'success',
  'one_hot': False,
  'k': 5},
 'baseline_AUC': '0.84',
 'closest_n': 3,
 'closest_diff': 0.2799999999999999}