In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
import matplotlib.pyplot as plt
from math import tan, atan, radians, degrees
import random
import json
import os

In [None]:
drive.mount('/content/drive')
%cd /write/your/directory/to/AI_4_ATD

In [None]:
epsilon = 0.001

# Define Processing Functions

In [None]:
class Format():
  def __init__(self):
    self.scale = 0
    self.cns = 0
    self.ia = 0
    self.imp = 0

In [None]:
def SCALE(dataset, labels, format):
  batch, n_lines, n_points = dataset.shape
  for i in range(batch):
    max_val = np.max(np.absolute([y for y in dataset[i][0]]))
    for j in range(n_lines - 1):
      new_max_val = np.max(np.absolute([y for y in dataset[i][j + 1]]))
      if new_max_val > max_val:
        max_val = new_max_val
    for j in range(n_lines):
      for k in range(n_points):
        if dataset[i][j][k] != -1:
          dataset[i][j][k] /= max_val
  format.scale = 1
  return dataset, labels, format

In [None]:
def SCALE(dataset, labels, format):
  shape = dataset.shape
  if len(shape) == 3:
    batch, n_lines, n_points = shape
    for i in range(batch):
      max_val = np.max(np.absolute([y for y in dataset[i][0]]))
      for j in range(n_lines - 1):
        new_max_val = np.max(np.absolute([y for y in dataset[i][j + 1]]))
        if new_max_val > max_val:
          max_val = new_max_val
      for j in range(n_lines):
        for k in range(n_points):
          if dataset[i][j][k] != -1:
            dataset[i][j][k] /= max_val
  else:
    batch, n_points = shape
    for i in range(batch):
      max_val = np.max(np.absolute([y for y in dataset[i]]))
      for j in range(n_points):
        if dataset[i][j] != -1:
          dataset[i][j] /= max_val
  format.scale = 1
  return dataset, labels, format

In [None]:
def CNS(dataset, labels, format):
  batch, n_lines, n_points = dataset.shape
  for i in range(batch):
    c_mean = np.mean([y for y in dataset[i][0] if y != -1])
    c_std = np.std([y for y in dataset[i][0] if y != -1]).clip(epsilon, None)
    for j in range(n_lines):
      for k in range(n_points):
        if dataset[i][j][k] != -1:
          dataset[i][j][k] = (dataset[i][j][k] - c_mean) / c_std
  dataset = np.delete(dataset, 0, axis=1)

  format.cns = 1
  return dataset, labels, format

In [None]:
def IA(dataset, labels, format):

  if format.cns == 0:
    control = dataset[:, 0, :].copy()
    dataset = np.delete(dataset, 0, axis=1)

  batch, n_lines, n_points = dataset.shape
  dataset = (dataset.copy()).reshape(batch*n_lines, n_points)

  if format.cns == 0:
    control = np.repeat(control, repeats=3, axis=0)
    dataset = np.stack((control, dataset), axis=1)

  labels = labels.reshape(batch*n_lines)

  format.ia = 1
  return dataset, labels, format

In [None]:
def iqm(series, q):
  q1 = np.quantile(series, 1-q)
  q3 = np.quantile(series, q)
  series = [y for y in series if q1 <= y <= q3]
  return np.mean(series)

def imputate(dataset, labels, format, imp_type):
  shape = dataset.shape
  if len(shape) == 3:
    batch, n_series, n_points = shape
    for i in range(batch):
      for j in range(n_series):
        series = [y for y in dataset[i][j] if y != -1]
        if len(series) == 0:
          dataset[i][j] = np.zeros(n_points)
        elif imp_type == 'mean':
          imput_val = np.mean(series)
        elif imp_type == 'median':
          imput_val = np.median(series)
        elif imp_type == 'iqm':
          imput_val = iqm(series, 0.75)
        else:
          assert False, f"imp_type={imp_type}"

        for k in range(n_points):
          if dataset[i][j][k] == -1:
            dataset[i][j][k] = imput_val
  else:
    batch, n_points = shape
    for i in range(batch):
      series = [y for y in dataset[i] if y != -1]
      if len(series) == 0:
        dataset[i] = np.zeros(n_points)
      elif imp_type == 'mean':
        imput_val = np.mean(series)
      elif imp_type == 'median':
        imput_val = np.median(series)
      elif imp_type == 'iqm':
        imput_val = iqm(series, 0.75)
      else:
        assert False, f"imp_type={imp_type}"

      for j in range(n_points):
          if dataset[i][j] == -1:
            dataset[i][j] = imput_val
  if imp_type == 'mean':
    format.imp = 1
  elif imp_type == 'median':
    format.imp = 2
  elif imp_type == 'iqm':
    format.imp = 3
  else:
    assert False, f"imp_type={imp_type}"

  return dataset, labels, format

In [None]:
def process(dataset, labels, goal_format):
  format = Format()
  if goal_format.cns == 1:
    dataset, labels, format = CNS(dataset, labels, format)
  if goal_format.ia == 1:
    dataset, labels, format = IA(dataset, labels, format)
  if goal_format.imp == 1:
    dataset, labels, format = imputate(dataset, labels, format, 'mean')
  elif goal_format.imp == 2:
    dataset, labels, format = imputate(dataset, labels, format, 'median')
  elif goal_format.imp == 3:
    dataset, labels, format = imputate(dataset, labels, format, 'iqm')
  if goal_format.scale == 1:
    dataset, labels, format = SCALE(dataset, labels, format)

  assert format.cns == goal_format.cns
  assert format.scale == goal_format.scale
  assert format.ia == goal_format.ia
  assert format.imp == goal_format.imp

  return dataset, labels, format

In [None]:
def build_model(format):
  # Determine the output size and input shape based on the format's 'ia' and 'cns' flags
  if format.ia == 0:
    out_size = 3
    if format.cns == 0:
      input_shape = (4, 10)
    else:
      input_shape = (3, 10)
  else:
    out_size = 1
    if format.cns == 0:
      input_shape = (2, 10)
    else:
      input_shape = (10,)

  # Initialize a Sequential Keras model
  model = tf.keras.Sequential()
  # Flatten the input data to a 1D vector
  model.add(layers.Flatten(input_shape=input_shape))
  # Add dense layers with ReLU activation for feature extraction
  model.add(layers.Dense(16, activation='relu'))
  model.add(layers.Dense(16, activation='relu'))
  # Add a Dropout layer to prevent overfitting
  model.add(layers.Dropout(0.2))
  # Add the output dense layer with softmax activation for classification
  model.add(layers.Dense(2*out_size, activation='softmax'))
  # Reshape the output if the original output size was 3, then apply softmax again
  if out_size == 3:
    model.add(layers.Reshape((3, 2)))
    model.add(layers.Softmax(axis=2))
  # Otherwise, apply softmax directly to the output
  else:
    model.add(layers.Softmax())

  return model

# Define Model Testing Functions

In [None]:
class Metrics():
  # Class to store various performance metrics for a model
  def __init__(self):
    self.acc = 0.0
    self.precision = 0.0
    self.recall = 0.0
    self.f1 = 0.0
    self.power = 0.0
    self.type1 = 0.0
    self.metrics_by_power = {}

def calc_metrics(preds, labels):
  # Calculates fundamental classification metrics (accuracy, precision, recall, f1, power, type1)
  true = preds == labels
  TP = np.sum(true * labels)
  TN = np.sum(true * (1 - labels))
  FP = np.sum((1 - true) * (1 - labels))
  FN = np.sum((1 - true) * labels)
  acc = (TP + TN) / (TP + TN + FP + FN)
  precision = TP / (TP + FP)
  recall = TP / (TP + FN)
  f1 = 2 * precision * recall / (precision + recall)
  power = TP / (TP + FN)
  type1 = FP / (FP + TN)

  return acc, precision, recall, f1, power, type1

def calc_metrics_by_power(preds, labels, effect_sizes, bin_values):
  # Calculates performance metrics binned by effect size categories
  metrics_by_power = {}
  n = len(bin_values)
  diff_mask = labels != 0

  # Get metrics for undifferentiated conditions (labels == 0)
  metrics = {}
  undiff_mask = labels == 0
  bin_preds = preds[undiff_mask]
  bin_labels = labels[undiff_mask]
  acc, precision, recall, f1, power, type1 = calc_metrics(bin_preds, bin_labels)
  metrics['acc'] = acc
  metrics['precision'] = precision
  metrics['recall'] = recall
  metrics['f1'] = f1
  metrics['power'] = power
  metrics['type1'] = type1
  metrics['n'] = len(bin_preds)
  metrics_by_power['undiff'] = metrics

  # Get metrics for each defined effect size bin
  for i in range(n):
    metrics = {}
    if i == n-1:
      # For the last bin, include all effect sizes greater than or equal to the last bin value
      mask = ( (effect_sizes >= bin_values[i]) | (effect_sizes == -2) ) & diff_mask
    else:
      # For other bins, define a range [bin_value_i, bin_value_{i+1})
      mask = (bin_values[i] <= effect_sizes) & (bin_values[i+1] > effect_sizes) & diff_mask
    mask = mask.flatten()
    bin_preds = preds[mask]
    bin_labels = labels[mask]
    bin_effect_sizes = effect_sizes[mask]
    acc, precision, recall, f1, power, type1 = calc_metrics(bin_preds, bin_labels)
    metrics['acc'] = acc
    metrics['precision'] = precision
    metrics['recall'] = recall
    metrics['f1'] = f1
    metrics['power'] = power
    metrics['type1'] = type1
    metrics['n'] = len(bin_preds)
    metrics_by_power[bin_values[i]] = metrics

  return metrics_by_power


def compute_metrics(model, data, lbls, effect_sizes):
  # Computes and aggregates various performance metrics for a given model
  effect_sizes = effect_sizes.flatten()
  mask = effect_sizes != -1 # Mask out invalid effect sizes
  dataset = data.copy()
  labels = lbls.copy()
  preds = np.argmax(model.predict(dataset, verbose=0), axis=-1) # Get model predictions
  preds = preds.flatten()
  labels = labels.flatten()
  preds = preds[mask]
  labels = labels[mask]

  # Calculate overall metrics
  true = preds == labels
  TP = np.sum(true * labels)
  TN = np.sum(true * (1 - labels))
  FP = np.sum((1 - true) * (1 - labels))
  FN = np.sum((1 - true) * labels)
  acc = (TP + TN) / (TP + TN + FP + FN)
  precision = TP / (TP + FP)
  recall = TP / (TP + FN)
  f1 = 2 * precision * recall / (precision + recall)
  power = TP / (TP + FN)
  type1 = FP / (FP + TN)

  # Define bins for effect sizes and calculate binned metrics
  bin_values = [0, 1, 2, 3, 4, 6, 10]
  metrics_by_power = calc_metrics_by_power(preds, labels, effect_sizes[mask], bin_values)

  # Populate a Metrics object with the calculated values
  metrics = Metrics()
  metrics.acc = float(acc)
  metrics.precision = float(precision)
  metrics.recall = float(recall)
  metrics.f1 = float(f1)
  metrics.power = float(power)
  metrics.type1 = float(type1)
  metrics.metrics_by_power = metrics_by_power

  return metrics

def create_ratings(model, data, lbls):
  # Generates predictions (ratings) from a given model and dataset
  dataset = data.copy()
  labels = lbls.copy()
  preds = np.argmax(model.predict(dataset, verbose=0), axis=-1) # Get model predictions
  preds = preds.flatten()
  return preds

# Load Dataset

In [None]:
dataset = np.load('Datasets/real_dataset.npy')
labels = np.load('Datasets/real_labels.npy')
effect_sizes = np.load('Datasets/real_effect_sizes.npy')

## Plot Testing Graph Examples

In [None]:
def plot_ATD(ax, graph, label, idx):
  random.seed(346)
  markers = ['o', 'v', 's', 'x']
  legend = ['Control', 'Condition 1', 'Condition 2', 'Condition 3']
  for i, line in enumerate(graph):
    series = [y for y in line if y != -1]
    if len(series) > 0:
      ax.plot([y for y in line if y != -1], color='k', marker = markers[i], label=legend[i])
  ax.legend()
  ax.set_title(f"(i={idx}), Label={label}")
  ax.set_xlabel('Session')
  ax.set_ylabel('Behavior Response')

# Generate three random indices
random_indices = random.sample(range(len(dataset)), 3)

# Create a figure with three subplots
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot each selected graph
for i, idx in enumerate(random_indices):
  plot_ATD(axes[i], dataset[idx], labels[idx], idx)

plt.tight_layout()
plt.show()

# Load Models

In [None]:
import re

model_runs = [0,1,2,3,4]

model_dict = {}
first_run = True
for run in model_runs:
  # Iterate through each model run directory
  model_files = [f for f in os.listdir(f'Training_Runs/SYS_Models_Run{run}') if 'FAM-SYS-' in f]
  if first_run:
    # Extract unique model IDs from filenames during the first run
    model_ids = [f[8:12] for f in model_files if 'FAM-SYS-' in f]
    first_run = False
  for i, id in enumerate(model_ids):
    # Initialize a Format object based on the model ID
    format = Format()
    format.scale = int(id[0])
    format.cns = int(id[1])
    format.ia = int(id[2])
    format.imp = int(id[3])
    if id not in model_dict:
      # If the model ID is new, create an entry in model_dict
      model_dict[id] = {
          'models': [],
          'epochs': [],
          'format': format,
          'metrics': {
              'acc': [],
              'precision': [],
              'recall': [],
              'f1': [],
              'power': [],
              'type1': [],
              'type2' : [],
          }
      }
    # Load the Keras model and add it to the model_dict
    model_dict[id]['models'].append(tf.keras.models.load_model(f'Training_Runs/SYS_Models_Run{run}/{model_files[i]}', safe_mode=False))
    # Extract and store the number of training epochs if present in the filename
    if 'ep=' in model_files[i]:
      epochs = re.search(r'ep=(\d+)', model_files[i]).group(1)
      model_dict[id]['epochs'].append(epochs)

# Model Testing

## Aggregate Run Metrics

In [None]:
for id in model_dict:
  # Process the dataset and labels according to the current model's format requirements
  dataset_it, labels_it, format = process(dataset.copy(), labels, model_dict[id]['format'])
  for model in model_dict[id]['models']:
    # Compute evaluation metrics for each model using the processed data
    metrics = compute_metrics(model, dataset_it, labels_it, effect_sizes)
    # Append calculated metrics to the model_dict for later aggregation
    model_dict[id]['metrics']['acc'].append(metrics.acc)
    model_dict[id]['metrics']['precision'].append(metrics.precision)
    model_dict[id]['metrics']['recall'].append(metrics.recall)
    model_dict[id]['metrics']['f1'].append(metrics.f1)
    model_dict[id]['metrics']['power'].append(metrics.power)
    model_dict[id]['metrics']['type1'].append(metrics.type1)
    model_dict[id]['metrics']['type2'].append(1 - metrics.power)

aggregate_data = []
for id in model_dict:
  # Aggregate mean metrics (accuracy, power, type 1 error) for each model ID
  aggregate_data.append([id, np.mean(model_dict[id]['metrics']['acc']), np.mean(model_dict[id]['metrics']['power']), np.mean(model_dict[id]['metrics']['type1'])])

import pandas as pd

df = pd.DataFrame(aggregate_data, columns=["Code", "Accuracy", "Power", "Type 1"])

# Extract features (Scale, CNS, IA, IMP) from the model's format code for analysis
df["Scale"] = df["Code"].str[0].astype(int)
df["CNS"] = df["Code"].str[1].astype(int)
df["IA"] = df["Code"].str[2].astype(int)
df["IMP"] = df["Code"].str[3].astype(int)

print()
# Analyze the impact of 'Scale', 'CNS', and 'IA' features on Accuracy
for feature in ["Scale", "CNS", "IA"]:
    mean_with = df[df[feature] == 1]["Accuracy"].mean()
    mean_without = df[df[feature] == 0]["Accuracy"].mean()
    print(f"{feature}: {mean_with:.4f} (enabled) vs {mean_without:.4f} (disabled) | diff={mean_with - mean_without:.4f}")

# Analyze the impact of different imputation types ('MEAN', 'MEDIAN', 'IQM') on Accuracy
mean_with_mean = df[df["IMP"] == 1]["Accuracy"].mean()
mean_with_median = df[df["IMP"] == 2]["Accuracy"].mean()
mean_with_iqm = df[df["IMP"] == 3]["Accuracy"].mean()
mean_without = df[df["IMP"] == 0]["Accuracy"].mean()
print()
print('___IMP___')
print(f'MEAN: {mean_with_mean:.4f} (enabled) vs {mean_without:.4f} (IMP disabled) | diff={mean_with_mean - mean_without:.4f}')
print(f'MEDIAN: {mean_with_median:.4f} (enabled) vs {mean_without:.4f} (IMP disabled) | diff={mean_with_median - mean_without:.4f}')
print(f'IQM: {mean_with_iqm:.4f} (enabled) vs {mean_without:.4f} (IMP disabled) | diff={mean_with_iqm - mean_without:.4f}')


In [None]:
df

In [None]:
top_3_accuracy_codes = df.nlargest(3, 'Accuracy')['Code'].tolist()
print(top_3_accuracy_codes)

## Power By Effect Size Plotting

In [None]:
model_ids_to_plot = ['0011', '0012', '1010']

fig, axs = plt.subplots(1, 3, figsize=(24, 6))
fig.suptitle('Power by Effect Size for Selected Models', fontsize=16)

for i, id in enumerate(model_ids_to_plot):
  dataset_it, labels_it, format = process(dataset.copy(), labels, model_dict[id]['format'])
  # Assuming we want to plot metrics for the first model in the list for each ID
  model = model_dict[id]['models'][0]
  metrics = compute_metrics(model, dataset_it, labels_it, effect_sizes)

  bins = list(metrics.metrics_by_power.keys())
  bins.remove('undiff')

  power_by_bin = [metrics.metrics_by_power[bin]['power'] for bin in bins]
  n_by_bin = [metrics.metrics_by_power[bin]['n'] for bin in bins]

  # Replace NaN values with 0 for plotting
  power_by_bin = [0 if np.isnan(x) else x for x in power_by_bin]

  # Create new x-axis labels
  new_bins = []
  for j in range(len(bins)):
      if j < len(bins) - 1:
          new_bins.append(f"[{bins[j]}, {bins[j+1]})")
      else:
          new_bins.append(f"[{bins[j]}, inf)")

  # Use indices for plotting positions
  x_positions = np.arange(len(bins))

  # Plot Power on the current subplot
  axs[i].grid(True, axis='y', alpha=0.5, zorder=0, linestyle=':')
  bars_power = axs[i].bar(x_positions, power_by_bin, color='grey')
  axs[i].set_title(f'Model {id} Power')
  axs[i].set_xlabel('Effect Size Bin')
  axs[i].set_ylabel('Power')
  axs[i].set_xticks(x_positions)
  axs[i].set_xticklabels(new_bins, rotation=45, ha='right')
  axs[i].set_ylim(0, 1.1)
  for bar, n in zip(bars_power, n_by_bin):
      yval = bar.get_height()
      axs[i].text(bar.get_x() + bar.get_width()/2, yval + 0.01, n, ha='center', va='bottom')

plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to prevent suptitle overlap
plt.show()

## Create Ratings Set

In [None]:
ratings = {}
for id in model_dict:
  model_index = np.argmax(model_dict[id]['metrics']['acc'])
  model = model_dict[id]['models'][model_index]
  dataset_it, labels_it, format = process(dataset.copy(), labels.copy(), model_dict[id]['format'])
  preds = create_ratings(model, dataset_it, labels_it)
  ratings[id] = preds

## Variance Analysis

In [None]:
accuracy = []
power = []
type1 = []
x_labels = []
for id in ['0012', '0011', '1010']:
  accuracy.append(model_dict[id]['metrics']['acc'])
  power.append(model_dict[id]['metrics']['power'])
  type1.append(model_dict[id]['metrics']['type1'])
  x_labels.append(id)

fig, axs = plt.subplots(1, 3, figsize=(12, 4))  # 1 row, 3 columns

# Plot individual box plots
axs[0].boxplot(accuracy, labels=x_labels)
axs[0].set_title('Accuracy')
axs[0].grid(True, linestyle="--", alpha=0.5)

axs[1].boxplot(power, labels=x_labels)
axs[1].set_title('Power')
axs[1].grid(True, linestyle="--", alpha=0.5)

axs[2].boxplot(type1, labels=x_labels)
axs[2].set_title('Type 1')
axs[2].grid(True, linestyle="--", alpha=0.5)

# LOOCV

### LOOCV Fine Tune Models

In [None]:
# Create the "LOOCV" directory if it doesn't exist
loocv_dir = "LOOCV"
os.makedirs(loocv_dir, exist_ok=True)
json_file_path = os.path.join(loocv_dir, "loocv_metrics_ft.json")

if 'loocv_metrics_ft.json' in os.listdir(loocv_dir):
  with open(json_file_path, 'r') as f:
      loocv_metrics_ft = json.load(f)
else:
  loocv_metrics_ft = {}

itterations = 5
for id in ['0012', '0011', '1010']:
  if id not in loocv_metrics_ft:
    loocv_metrics_ft[id] = {'acc': [],
                        'power': [],
                        'type1': [],
                        }

  runs_complete = len(loocv_metrics_ft[id]['acc'])
  for run in range(itterations - runs_complete):

    random.seed(42 + 7 * (run + runs_complete))
    np.random.seed(42 + 7 * (run + runs_complete))
    tf.random.set_seed(42 + 7 * (run + runs_complete))

    best_model = int(np.argmax(model_dict[id]['metrics']['acc']))
    model = model_dict[id]['models'][best_model]
    preds = np.array([])
    print(id, '_____')
    for n in range(len(labels)):
      if n % 10 == 0:
        print(f'{n}|', end='')
      dataset_it, labels_it, format = process(dataset.copy(), labels.copy(), model_dict[id]['format'])
      if labels_it.shape[0] == labels.shape[0]:
        test_one = np.array([dataset_it[n]])
        test_one_lbl = np.array([labels_it[n]])
        one_out = np.delete(dataset_it, n, axis=0)
        one_out_lbls = np.delete(labels_it, n, axis=0)
      else:
        indices = range(n*3, n*3 + 3, 1)
        test_one = dataset_it[indices]
        test_one_lbl = labels_it[indices]
        one_out = np.delete(dataset_it, indices, axis=0)
        one_out_lbls = np.delete(labels_it, indices, axis=0)

      history = model.fit(
          one_out,
          one_out_lbls,
          epochs=100,
          validation_data=(test_one, test_one_lbl),
          verbose=0)

      pred = np.argmax(model.predict(test_one, verbose=0), axis=-1)
      preds = np.concatenate((preds, pred.flatten()), axis=0)

    correct_lbl = labels.flatten()
    true = preds == correct_lbl
    TP = np.sum(true * correct_lbl)
    TN = np.sum(true * (1 - correct_lbl))
    FP = np.sum((1 - true) * (1 - correct_lbl))
    FN = np.sum((1 - true) * correct_lbl)
    acc = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * precision * recall / (precision + recall)
    power = TP / (TP + FN)
    type1 = FP / (FP + TN)

    loocv_metrics_ft[id]['acc'].append(acc)
    loocv_metrics_ft[id]['power'].append(power)
    loocv_metrics_ft[id]['type1'].append(type1)

    with open(json_file_path, "w") as json_file:
      json.dump(loocv_metrics_ft, json_file)

    print(f'Run {run} Accuracy: {acc}, Power: {power}, Type 1: {type1}')

In [None]:
acc_bin = []
power_bin = []
type1_bin = []
for id in loocv_metrics_ft:
  acc = np.mean(loocv_metrics_ft[id]['acc'])
  power = np.mean(loocv_metrics_ft[id]['power'])
  type1 = np.mean(loocv_metrics_ft[id]['type1'])

  acc_bin.append(loocv_metrics_ft[id]['acc'])
  power_bin.append(loocv_metrics_ft[id]['power'])
  type1_bin.append(loocv_metrics_ft[id]['type1'])

  print(f'{id}: Accuracy: {acc:.3f}, Power: {power:.3f}, Type 1: {type1:.3f}')

plt.boxplot(acc_bin, tick_labels=loocv_metrics_ft.keys())
plt.title('LOOCV Accuracy')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()
plt.boxplot(power_bin, tick_labels=loocv_metrics_ft.keys())
plt.title('LOOCV Power')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()
plt.boxplot(type1_bin, tick_labels=loocv_metrics_ft.keys())
plt.title('LOOCV Type 1')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

## LOOCV Train Models From Blank

In [None]:
predictions_bl = {}

# Create the "LOOCV" directory if it doesn't exist
loocv_dir = "LOOCV"
os.makedirs(loocv_dir, exist_ok=True)
json_file_path = os.path.join(loocv_dir, "loocv_metrics_bl.json")

if 'loocv_metrics_bl.json' in os.listdir(loocv_dir):
  with open(json_file_path, 'r') as f:
      loocv_metrics_bl = json.load(f)
else:
  loocv_metrics_bl = {}

itterations = 5
for id in ['0012', '0011', '1010']:
  print(id, '_____')
  if id not in loocv_metrics_bl:
    loocv_metrics_bl[id] = {'acc': [],
                        'power': [],
                        'type1': [],
                        }
  predictions_bl[id] = {}

  runs_complete = len(loocv_metrics_bl[id]['acc'])
  for run in range(itterations - runs_complete):
    random.seed(42 + 7 * (run + runs_complete))
    np.random.seed(42 + 7 * (run + runs_complete))
    tf.random.set_seed(42 + 7 * (run + runs_complete))
    model = build_model(model_dict[id]['format'])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    preds = np.array([])
    for n in range(len(labels)):
      if n % 10 == 0:
        print(f'{n}|', end='')
      dataset_it, labels_it, format = process(dataset.copy(), labels.copy(), model_dict[id]['format'])
      if labels_it.shape[0] == labels.shape[0]:
        test_one = np.array([dataset_it[n]])
        test_one_lbl = np.array([labels_it[n]])
        one_out = np.delete(dataset_it, n, axis=0)
        one_out_lbls = np.delete(labels_it, n, axis=0)
      else:
        indices = range(n*3, n*3 + 3, 1)
        test_one = dataset_it[indices]
        test_one_lbl = labels_it[indices]
        one_out = np.delete(dataset_it, indices, axis=0)
        one_out_lbls = np.delete(labels_it, indices, axis=0)

      equalized_epochs = 100 + int(model_dict[id]['epochs'][0])
      history = model.fit(
          one_out,
          one_out_lbls,
          epochs=equalized_epochs,
          validation_data=(test_one, test_one_lbl),
          verbose=0)

      pred = np.argmax(model.predict(test_one, verbose=0), axis=-1)
      preds = np.concatenate((preds, pred.flatten()), axis=0)

    correct_lbl = labels.flatten()
    true = preds == correct_lbl
    TP = np.sum(true * correct_lbl)
    TN = np.sum(true * (1 - correct_lbl))
    FP = np.sum((1 - true) * (1 - correct_lbl))
    FN = np.sum((1 - true) * correct_lbl)
    acc = (TP + TN) / (TP + TN + FP + FN)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * precision * recall / (precision + recall)
    power = TP / (TP + FN)
    type1 = FP / (FP + TN)

    predictions_bl[id][run] = [acc, preds]

    loocv_metrics_bl[id]['acc'].append(acc)
    loocv_metrics_bl[id]['power'].append(power)
    loocv_metrics_bl[id]['type1'].append(type1)

    with open(json_file_path, "w") as json_file:
      json.dump(loocv_metrics_bl, json_file)

    print(f'Run {run} Accuracy: {acc}, Power: {power}, Type 1: {type1}')

In [None]:
acc_bin = []
power_bin = []
type1_bin = []
for id in loocv_metrics_bl:
  acc = np.mean(loocv_metrics_bl[id]['acc'])
  power = np.mean(loocv_metrics_bl[id]['power'])
  type1 = np.mean(loocv_metrics_bl[id]['type1'])

  acc_bin.append(loocv_metrics_bl[id]['acc'])
  power_bin.append(loocv_metrics_bl[id]['power'])
  type1_bin.append(loocv_metrics_bl[id]['type1'])

  print(f'{id}: Accuracy: {acc:.3f}, Power: {power:.3f}, Type 1: {type1:.3f}')

plt.boxplot(acc_bin, tick_labels=loocv_metrics_bl.keys())
plt.title('LOOCV Accuracy')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()
plt.boxplot(power_bin, tick_labels=loocv_metrics_bl.keys())
plt.title('LOOCV Power')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()
plt.boxplot(type1_bin, tick_labels=loocv_metrics_bl.keys())
plt.title('LOOCV Type 1')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

### Create Rating Set

In [None]:
max_run = [0, None, None]
for id in predictions_bl:
  for run in predictions_bl[id]:
    if predictions_bl[id][run][0] > max_run[0]:
      max_run = [predictions_bl[id][run][0], id, run]

print(max_run)

ratings[f'LOOCV_Trained_{max_run[1]}'] = predictions_bl[max_run[1]][max_run[2]][1]

## Combine LOOCV Boxplots

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data: Three models, each with three techniques
data = {
    "0011": {
        "Tech 1": model_dict['0011']['metrics']['acc'],
        "Tech 2": loocv_metrics_bl['0011']['acc'],
        "Tech 3": loocv_metrics_ft['0011']['acc'],
    },
    "0012": {
        "Tech 1": model_dict['0012']['metrics']['acc'],
        "Tech 2": loocv_metrics_bl['0012']['acc'],
        "Tech 3": loocv_metrics_ft['0012']['acc'],
    },
    "1010": {
        "Tech 1": model_dict['1010']['metrics']['acc'],
        "Tech 2": loocv_metrics_bl['1010']['acc'],
        "Tech 3": loocv_metrics_ft['1010']['acc'],
    }
}

# Flatten data for plotting
all_data = []
positions = []
offset = [-0.2, 0, 0.2]  # Offset for techniques within a model
xticks = []
tick_positions = []

for i, (model, techniques) in enumerate(data.items()):
    xticks.append(model)
    tick_positions.append(i)
    for j, (tech, values) in enumerate(techniques.items()):
        all_data.append(values)
        positions.append(i + offset[j])  # Spread techniques slightly

colors = ["lightgray", "gray", "dimgray"]
colors = [plt.cm.Greys(x) for x in [0.1, 0.4, 0.7]]
tech_names = ['Train w/ Simulated', 'LOOCV w/ Real', 'LOOCV Fine-Tune w/ Real']

for j, tech in enumerate(techniques):
    tech_data = [data[model][tech] for model in xticks]
    positions = [i + offset[j] for i in tick_positions]

    bp = plt.boxplot(
        tech_data,
        positions=positions,
        widths=0.15,
        patch_artist=True  # Required to color the boxes
    )

    # Apply consistent color
    for box in bp['boxes']:
        box.set_facecolor(colors[j])
    for whisker in bp['whiskers']:
        whisker.set_color(colors[j])
    for cap in bp['caps']:
        cap.set_color(colors[j])
    for median in bp['medians']:
        median.set_color("black")  # Optional: black median line

     # Set whisker, cap, median, and flier colors for visibility
    for whisker in bp['whiskers']:
        whisker.set_color("black")
    for cap in bp['caps']:
        cap.set_color("black")
    for median in bp['medians']:
        median.set_color("black")
    for flier in bp['fliers']:
        flier.set(marker='o', color='black')

# Labeling
plt.xticks(tick_positions, xticks)
plt.xlabel("Models")
plt.ylabel("Metric")
#plt.title("Comparison of Accuracy Across Models")
plt.grid(axis='y', linestyle="--", alpha=0.5) # Add gridlines on the y-axis
plt.ylim(0.8, 1) # Increase y-axis limit
plt.yticks(np.arange(0.8, 1, 0.05)) # Set y-axis ticks at intervals of 0.2


# Legend handles
from matplotlib.patches import Patch
legend_handles = [Patch(facecolor=colors[i], label=tech_names[i]) for i, t in enumerate(techniques)]
plt.legend(handles=legend_handles, loc="lower left")

plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data: Three models, each with three techniques
data = {
    "0011": {
        "Tech 1": model_dict['0011']['metrics']['power'],
        "Tech 2": loocv_metrics_bl['0011']['power'],
        "Tech 3": loocv_metrics_ft['0011']['power'],
    },
    "0012": {
        "Tech 1": model_dict['0012']['metrics']['power'],
        "Tech 2": loocv_metrics_bl['0012']['power'],
        "Tech 3": loocv_metrics_ft['0012']['power'],
    },
    "1010": {
        "Tech 1": model_dict['1010']['metrics']['power'],
        "Tech 2": loocv_metrics_bl['1010']['power'],
        "Tech 3": loocv_metrics_ft['1010']['power'],
    }
}

# Flatten data for plotting (prepare data structure for box plot)
all_data = []
positions = []
offset = [-0.2, 0, 0.2]  # Offset for techniques within a model to avoid overlap
xticks = []
tick_positions = []

for i, (model, techniques) in enumerate(data.items()):
    xticks.append(model)
    tick_positions.append(i)
    for j, (tech, values) in enumerate(techniques.items()):
        all_data.append(values)
        positions.append(i + offset[j])  # Spread techniques slightly for visualization

# Define colors and labels for the different techniques/bars
colors = ["lightgray", "gray", "dimgray"]
colors = [plt.cm.Greys(x) for x in [0.1, 0.4, 0.7]]
tech_names = ['Train w/ Simulated', 'LOOCV w/ Real', 'LOOCV Fine-Tune w/ Real']

# Loop through each technique to plot its data for all models
for j, tech in enumerate(techniques):
    tech_data = [data[model][tech] for model in xticks]
    positions = [i + offset[j] for i in tick_positions]

    # Create box plots for the current technique across models
    bp = plt.boxplot(
        tech_data,
        positions=positions,
        widths=0.15,
        patch_artist=True  # Required to color the boxes
    )

    # Apply consistent color to the box plot elements
    for box in bp['boxes']:
        box.set_facecolor(colors[j])
    for whisker in bp['whiskers']:
        whisker.set_color(colors[j])
    for cap in bp['caps']:
        cap.set_color(colors[j])
    for median in bp['medians']:
        median.set_color("black")  # Optional: black median line

     # Set whisker, cap, median, and flier colors for visibility
    for whisker in bp['whiskers']:
        whisker.set_color("black")
    for cap in bp['caps']:
        cap.set_color("black")
    for median in bp['medians']:
        median.set_color("black")
    for flier in bp['fliers']:
        flier.set(marker='o', color='black')

# Labeling and plot customization
plt.xticks(tick_positions, xticks)
plt.xlabel("Models")
plt.ylabel("Metric")
#plt.title("Comparison of Type 1 Error Across Models")
plt.grid(True, linestyle="--", alpha=0.5)
plt.ylim(0.8, 1) # Increase y-axis limit
plt.yticks(np.arange(0.8, 1, 0.05)) # Set y-axis ticks at intervals of 0.2

# Create legend handles and display the legend
from matplotlib.patches import Patch
legend_handles = [Patch(facecolor=colors[i], label=tech_names[i]) for i, t in enumerate(techniques)]
plt.legend(handles=legend_handles, loc="lower left")

plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data: Three models, each with three techniques
# This dictionary structures the data for three specific models ('0011', '0012', '1010'),
# with each model having performance metrics ('type1' in this case) from three different techniques.
data = {
    "0011": {
        "Tech 1": model_dict['0011']['metrics']['type1'],
        "Tech 2": loocv_metrics_bl['0011']['type1'],
        "Tech 3": loocv_metrics_ft['0011']['type1'],
    },
    "0012": {
        "Tech 1": model_dict['0012']['metrics']['type1'],
        "Tech 2": loocv_metrics_bl['0012']['type1'],
        "Tech 3": loocv_metrics_ft['0012']['type1'],
    },
    "1010": {
        "Tech 1": model_dict['1010']['metrics']['type1'],
        "Tech 2": loocv_metrics_bl['1010']['type1'],
        "Tech 3": loocv_metrics_ft['1010']['type1'],
    }
}

# Prepare data structure for plotting box plots
# These lists will hold all the data points, their positions on the x-axis,
# and the labels for the x-axis ticks.
all_data = []
positions = []
offset = [-0.2, 0, 0.2]  # Offset for techniques within a model to avoid overlap
xticks = []
tick_positions = []

# Populate the x-axis labels and positions based on the models
for i, (model, techniques) in enumerate(data.items()):
    xticks.append(model)
    tick_positions.append(i)
    for j, (tech, values) in enumerate(techniques.items()):
        all_data.append(values)
        positions.append(i + offset[j])  # Spread techniques slightly for visualization

# Define colors and labels for the different techniques/bars in the legend
colors = ["lightgray", "gray", "dimgray"]
colors = [plt.cm.Greys(x) for x in [0.1, 0.4, 0.7]]
tech_names = ['Train w/ Simulated', 'LOOCV w/ Real', 'LOOCV Fine-Tune w/ Real']

# Loop through each technique to plot its data for all models
# This creates grouped box plots, one group per model, with boxes for each technique.
for j, tech in enumerate(techniques):
    tech_data = [data[model][tech] for model in xticks]
    positions = [i + offset[j] for i in tick_positions]

    # Create box plots for the current technique across models
    bp = plt.boxplot(
        tech_data,
        positions=positions,
        widths=0.15,
        patch_artist=True  # Required to color the boxes
    )

    # Apply consistent color to the box plot elements (boxes, whiskers, caps, medians)
    for box in bp['boxes']:
        box.set_facecolor(colors[j])
    for whisker in bp['whiskers']:
        whisker.set_color(colors[j])
    for cap in bp['caps']:
        cap.set_color(colors[j])
    for median in bp['medians']:
        median.set_color("black")  # Optional: black median line

     # Set whisker, cap, median, and flier colors for visibility
    for whisker in bp['whiskers']:
        whisker.set_color("black")
    for cap in bp['caps']:
        cap.set_color("black")
    for median in bp['medians']:
        median.set_color("black")
    for flier in bp['fliers']:
        flier.set(marker='o', color='black')

# Labeling and plot customization
plt.xticks(tick_positions, xticks) # Set x-axis tick labels
plt.xlabel("Models") # Label for the x-axis
plt.ylabel("Metric") # Label for the y-axis
#plt.title("Comparison of Accuracy Across Models") # Optional title for the plot
plt.grid(True, linestyle="--", alpha=0.5) # Add gridlines on the y-axis

# Create legend handles and display the legend
from matplotlib.patches import Patch
legend_handles = [Patch(facecolor=colors[i], label=tech_names[i]) for i, t in enumerate(techniques)]
plt.legend(handles=legend_handles, loc="lower left") # Place the legend on the plot

plt.show() # Display the plot

# Interrater Agreement

## Load Expert Ratings

In [None]:
def load_expert_ratings(file_path, subs):

    if 'r-' in file_path:
      i = 1
    elif 'rh-' in file_path:
      i = 0

    results = []

    with open(file_path, 'r') as file:
        for _ in range(4):
              next(file, None)
        for line in file:
            # Split the line into columns
            columns = line.strip().split(',')  # Assuming CSV format

            # Check if we have enough columns and column 2 contains a subject from our list
            if len(columns) >= 5 and int(columns[i]) in subs:
                subject = int(columns[i])
                # Extract values from columns 2, 3, and 4 (indices 1, 2, 3)
                col3_value = int(columns[1+i])
                col4_value = int(columns[2+i])
                col5_value = int(columns[3+i])

                # Add the values to our results list
                results += [col3_value, col4_value, col5_value]

    return np.array(results)

In [None]:
r_val_subs = [2,3,4,133,136,140,13,14,16,18,20,22,23,25,26,156,28,30,31,158,34,36,38,46,50,52,54,56,57,58,61,69,70,72,74,76,79,80,82,101,102,104,105,106]
h_val_subs = [6,10,11,12,13,16,17,18,20,23,26,27,28,29,30,33,34,35,36,37,38,39,40,41,42,43,44,45,47,49,54,55,57,58,61,62,63,64,65]
r_val_subs = sorted(r_val_subs)
h_val_subs = sorted(h_val_subs)
file_names = os.listdir('Ratings')

r_ratings = []
h_ratings = []
for name in file_names:
  if 'rh-' in name and '.csv' in name:
    h_ratings.append(name)
  elif 'r-' in name and '.csv' in name:
    r_ratings.append(name)

for name in r_ratings:
  rater = name.split('-')[1].split('.')[0]
  if rater == 'neely_VA':
    rater = 'Expert_1'
  elif rater == 'katie_VA':
    rater = 'Expert_2'
  elif rater == 'CDC':
    rater = 'MDC'
  ratings[rater] = load_expert_ratings(os.path.join('Ratings', name), r_val_subs)

for name in h_ratings:
  rater = name.split('-')[1].split('.')[0]
  if rater == 'neely_VA':
    rater = 'Expert_1'
  elif rater == 'katie_VA':
    rater = 'Expert_2'
  elif rater == 'CDC':
    rater = 'MDC'
  ratings[rater] = np.concatenate((ratings[rater], load_expert_ratings(os.path.join('Ratings', name), h_val_subs)))

ratings['Ground_Truth'] = labels.flatten()

## Create IRA Matrix

In [None]:
keys = ['Ground_Truth', 'Expert_1', 'Expert_2', 'MDC', '1010', '0011', '0012', 'LOOCV_Trained_0012']

cm = [[-1 for j in range(len(keys))] for i in range(len(keys))]
for i, key1 in enumerate(keys):
  for j, key2 in enumerate(keys):
    agreement = np.mean(ratings[key1] == ratings[key2])
    cm[i][j] = agreement

cm = np.array(cm)

In [None]:
import seaborn as sns

plt.figure(figsize=(10, 8))

# Create mask for the diagonal (self-comparison values)
mask = np.triu(np.ones_like(cm, dtype=bool))
np.fill_diagonal(mask, True)

# Set up the heatmap
sns.heatmap(cm, annot=True, fmt='.2f', cmap='coolwarm',
            xticklabels=keys, yticklabels=keys,
            vmin=0.8, vmax=1, mask=mask)

# Add labels and title
plt.xlabel('Rater')
plt.ylabel('Rater')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Describe the Curated Dataset

In [None]:
data = {
    "Descriptor": ["Level for Diff", "Level for Undiff", "Trend (deg)", "Trend (SMD/Sesh)", "Variability", "CV"],
    10: [0, 0, 0, 0, 0, 0],
    50: [0, 0, 0, 0, 0, 0],
    90: [0, 0, 0, 0, 0, 0],
}

df = pd.DataFrame(data)

In [None]:
format = Format()
format.scale = 0
format.cns = 1
format.ia = 1
format.imp = 0
# Process the dataset to get Centralized and Individualized data for calculating various descriptors
dataset_it, labels_it, format = process(dataset.copy(), labels.copy(), format)
labels_it = labels_it.flatten()

# Calculate and store percentiles for 'Level for Diff' (Differentiated) series
dataset_temp = []
for line in dataset_it[labels_it == 1]:
  series = [y for y in line if y != -1]
  if len(series) == 0:
    continue
  dataset_temp.append(np.mean(series))
for p in [10, 50, 90]:
  df.loc[df["Descriptor"] == "Level for Diff", p] = float(np.percentile(dataset_temp, p))

# Calculate and store percentiles for 'Level for Undiff' (Undifferentiated) series
dataset_temp = []
for line in dataset_it[labels_it == 0]:
  series = [y for y in line if y != -1]
  if len(series) == 0:
    continue
  dataset_temp.append(np.mean(series))
for p in [10, 50, 90]:
  df.loc[df["Descriptor"] == "Level for Undiff", p] = np.percentile(dataset_temp, p)

# Calculate and store percentiles for 'Trend (SMD/Sesh)' (Standardized Mean Difference per Session)
dataset_temp = []
for line in dataset_it:
  series = [y for y in line if y != -1]
  if len(series) == 0:
    continue
  a, b = np.polyfit(range(len(series)), series, 1)
  dataset_temp.append(a)
for p in [10, 50, 90]:
  df.loc[df["Descriptor"] == "Trend (SMD/Sesh)", p] = np.percentile(dataset_temp, p)

# Calculate and store percentiles for 'Variability' (Standard Deviation of residuals after linear fit)
dataset_temp = []
for line in dataset_it:
  series = [y for y in line if y != -1]
  if len(series) == 0:
    continue
  a, b = np.polyfit(range(len(series)), series, 1)
  error = [y - a*x + b for x, y in enumerate(series)]
  dataset_temp.append(np.std(error))
for p in [10, 50, 90]:
  df.loc[df["Descriptor"] == "Variability", p] = np.percentile(dataset_temp, p)

# Calculate and store percentiles for 'CV' (Coefficient of Variation)
dataset_temp = []
for line in dataset_it:
  series = [y for y in line if y != -1]
  if len(series) == 0:
    continue
  std = np.std(series).clip(epsilon, None)
  dataset_temp.append(abs(np.mean(series)) / std)
for p in [10, 50, 90]:
  df.loc[df["Descriptor"] == "CV", p] = np.percentile(dataset_temp, p)

format = Format()
format.scale = 0
format.cns = 0
format.ia = 1
format.imp = 0
# Reprocess the dataset with a different format to calculate trend in degrees
dataset_it, labels_it, format = process(dataset.copy(), labels.copy(), format)
labels_it = labels_it.flatten()

# Calculate and store percentiles for 'Trend (deg)' (Trend in degrees based on the first series)
dataset_temp = []
for line in dataset_it:
  series = [y for y in line[1] if y != -1]
  if len(series) == 0:
    continue
  a, b = np.polyfit(range(len(series)), series, 1)
  dataset_temp.append(degrees(atan(a)))
for p in [10, 50, 90]:
  df.loc[df["Descriptor"] == "Trend (deg)", p] = np.percentile(dataset_temp, p)

In [None]:
print(df)