In [None]:
import os
import warnings

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim

from hmmlearn import hmm

# Suppress warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convolutional Autoencoder Structure:

In [None]:
# Define the Convolutional Autoencoder (CAE) architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()

        self.ls = 256

        # Define separate encoders for each signal
        self.ecg_encoder = nn.Sequential(
            nn.Conv1d(1, 8, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(8, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(128, 256, kernel_size=3, stride=2, padding=1),
        )
        self.rsp_encoder = self.ecg_encoder
        self.eda_tonic_encoder = self.ecg_encoder
        self.eda_phasic_encoder = self.ecg_encoder

        # Define separate decoders for each signal
        self.ecg_decoder = nn.Sequential(
            nn.ConvTranspose1d(256, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(16, 8, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(8, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),
        )
        self.rsp_decoder = self.ecg_decoder
        self.eda_tonic_decoder = self.ecg_decoder
        self.eda_phasic_decoder = self.ecg_decoder

    def encode(self, x):
        # Split input by channel for independent processing
        ecg = x[:, 0, :].unsqueeze(1)
        rsp = x[:, 1, :].unsqueeze(1)
        eda_tonic = x[:, 2, :].unsqueeze(1)
        eda_phasic = x[:, 3, :].unsqueeze(1)

        # Encode each signal independently
        ecg_encoded = self.ecg_encoder(ecg)
        rsp_encoded = self.rsp_encoder(rsp)
        eda_tonic_encoded = self.eda_tonic_encoder(eda_tonic)
        eda_phasic_encoded = self.eda_phasic_encoder(eda_phasic)

        # Concatenate the latent representations along the last dimension
        latent_space = torch.cat(
            (ecg_encoded, rsp_encoded, eda_tonic_encoded, eda_phasic_encoded), dim=1
        )

        return latent_space

    def decode(self, latent_space):
        # Split latent space back into separate channels
        ecg_latent, rsp_latent, eda_tonic_latent, eda_phasic_latent = torch.split(
            latent_space, self.ls, dim=1
        )
        # Decode each signal independently
        ecg_decoded = self.ecg_decoder(ecg_latent)
        rsp_decoded = self.rsp_decoder(rsp_latent)
        eda_tonic_decoded = self.eda_tonic_decoder(eda_tonic_latent)
        eda_phasic_decoded = self.eda_phasic_decoder(eda_phasic_latent)

        # Concatenate the decoded signals to form the output
        reconstructed = torch.cat(
            (ecg_decoded, rsp_decoded, eda_tonic_decoded, eda_phasic_decoded), dim=1
        )

        return reconstructed

    def forward(self, x):
        latent_space = self.encode(x)
        reconstructed = self.decode(latent_space)
        return reconstructed

In [None]:
learning_rate = 0.001

# Train Autoencoders to Each Participants' Baseline Data

In [None]:
segment_size = "12s"
step_size = "0.001s"

In [None]:
# loop through baseline data
for file in os.listdir("../Physiological Preprocessed/Exp2"):
    participant = file.split("_")[0]
    if "baseline" not in file:
        continue
    elif os.path.exists(f"../Convolutional Autoencoder Models/{participant}_model.pth"):
        print(f"Model for {file} already exists")
        continue

    print(f"Loading {participant} data")
    print(f"-" * 50)

    # load data
    physiological_data = pd.read_csv(f"../Physiological Preprocessed/Exp2/{file}", usecols=["Time", "ECG_Clean", "RSP_Clean", "EDA_Tonic", "EDA_Phasic"])
    physiological_data["Time"] = pd.to_timedelta(physiological_data["Time"])
    physiological_data.set_index("Time", inplace=True)

    print(f"Processing participant {participant} data")

    # Normalize the data
    scalar = MinMaxScaler()
    data = scalar.fit_transform(physiological_data)
    physiological_data = pd.DataFrame(data, columns=physiological_data.columns, index=physiological_data.index)

    # Split the data into sliding windows
    X = []
    len_segment = pd.Timedelta(segment_size) / pd.Timedelta(step_size)
    while len(physiological_data) > 0:
        start_index = physiological_data.index[0]
        end_index = start_index + pd.Timedelta(segment_size)
        segment = physiological_data[:end_index]
        physiological_data = physiological_data[end_index:]

        if len(segment) > len_segment:
            length = len(segment) - len_segment
            segment = segment.drop(segment.tail(int(length)).index)

        if len(segment) == len_segment:
            X.append(segment.to_numpy())

    X = np.stack(X)

    # Convert to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32).to(device)

    features = X.shape[2]

    # Initialize the model
    model = ConvAutoencoder().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

    # Training parameters
    batch_size = 4
    num_epochs = 50

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for i in range(0, len(X), batch_size):

            # Get the batch and reshape it for Conv1d (batch, channels, sequence_length)
            batch = X[i : i + batch_size].permute(0, 2, 1)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(batch)

            # Calculate loss
            loss = criterion(outputs, batch)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(X):.4f}")

    # Save the model
    if not os.path.exists("../Convolutional Autoencoder Models"):
        os.makedirs("../Convolutional Autoencoder Models")
    torch.save(model.state_dict(), f"../Convolutional Autoencoder Models/{participant}_model.pth")

---
## Constructing Observations

### Grab the physiological timestamps / takeover times

In [None]:
processed_physio_folder_path = "../Physiological Preprocessed/"

exp2_folder_path = processed_physio_folder_path + "Exp2"

exp2_takeover_times = pd.read_csv(
    "../AdVitam/Exp2/Preprocessed/Physio and Driving/timestamps_obstacles.csv"
)
exp2_takeover_times.iloc[:, 2:] = exp2_takeover_times.iloc[:, 2:].apply(pd.to_timedelta, unit="s")
exp2_takeover_times["subject_id"] = exp2_takeover_times["subject_id"].apply(
    lambda x: x.split("T")[0] + "T" + x.split("T")[1].zfill(2)
)
exp2_takeover_times["subject_id"] = exp2_takeover_times["subject_id"].astype(str)
exp2_takeover_times.drop(columns=["label_st"], inplace=True)
exp2_takeover_times.sort_values(by=["subject_id"], inplace=True)

for column in exp2_takeover_times.columns:
    if "TrigObs" in column:
        exp2_takeover_times = exp2_takeover_times.rename(
            columns={column: column.replace("TrigObs", "") + "TOR"}
        )
    elif "RepObs" in column:
        exp2_takeover_times = exp2_takeover_times.rename(
            columns={column: column.replace("RepObs", "Response")}
        )

exp2_takeover_times

### Create the observations for the slow and fast takeover times.

In [None]:
def collect_observations(
    exp2_folder_path,
):
    """
    Create the observations for the slow and fast takeover times.

    Args:
        phsyiological_data_dictionary (dict): A dictionary containing the segmented physiological data files.
        takeover_times (pd.DataFrame): A DataFrame containing the takeover times.
        driver_demographic_data (pd.DataFrame): A DataFrame containing the driver demographic data.
        window_length (int): The length of the window in minutes.
        window_step (int): The step size for the window
        step_sizes (list): A list of step sizes for the window.
        tot (str): The threshold for the takeover time.

    Returns:
        list: A list of observations for the slow takeover times.
        list: A list of observations for the fast takeover times.
    """

    driving_observations_data = []
    takeover_observations_data = []

    # Exp2
    for file in os.listdir(exp2_folder_path):
        # Split the file name into the participant and period
        f = file.split("_")
        participant = f[0]
        period = f[1].split(".")[0]

        if "baseline" in period:
            continue
        elif "driving" in period:
            print(participant)
            print(f"-" * 50)

            # Process the physiological data
            experiment_physio = pd.read_csv(exp2_folder_path + "/" + file, usecols=["Time", "ECG_Clean", "RSP_Clean", "EDA_Tonic", "EDA_Phasic"])
            experiment_physio["Time"] = pd.to_timedelta(experiment_physio["Time"])
            experiment_physio.set_index("Time", inplace=True)

            # Normalize the data
            scalar = MinMaxScaler()
            data = scalar.fit_transform(experiment_physio)
            experiment_physio = pd.DataFrame(data, columns=experiment_physio.columns, index=experiment_physio.index)

            # Obstacle Trigger Times
            participant_takeover_times = exp2_takeover_times[exp2_takeover_times["subject_id"] == participant].copy()
            participant_takeover_times.iloc[:, 1:] = participant_takeover_times.iloc[:, 1:].apply(pd.to_timedelta, args=("s",), errors="coerce")

            obstacles = ["Deer", "Cone", "Frog", "Can"]
            for obstacle in obstacles:
                print(obstacle)

                # Obstacle Trigger Time
                obstacle_trigger_time = pd.to_timedelta(participant_takeover_times[f"{obstacle}TOR"].values[0], unit="s")
                minute_before_obstacle = obstacle_trigger_time - pd.Timedelta(seconds=60)

                # If the obstacle trigger time is null, skip the obstacle
                if pd.isnull(obstacle_trigger_time):
                    continue
                if pd.isnull(minute_before_obstacle):
                    continue

                # Observations 1 minute before and after the obstacle
                driving_observations_before_obstacle = experiment_physio.loc[
                    minute_before_obstacle - pd.Timedelta(seconds=4) : minute_before_obstacle
                ].copy()
                driving_observations_after_obstacle = experiment_physio.loc[
                    minute_before_obstacle : minute_before_obstacle + pd.Timedelta(seconds=8)
                ].copy()

                # Observations 3 seconds before and after the obstacle
                takeover_observations_before_obstacle = experiment_physio.loc[
                    obstacle_trigger_time - pd.Timedelta(seconds=4) : obstacle_trigger_time
                ].copy()
                takeover_observations_after_obstacle = experiment_physio.loc[
                    obstacle_trigger_time : obstacle_trigger_time + pd.Timedelta(seconds=8)
                ].copy()

                # Check if the last observation of before obstacle is the same as the first observation of after obstacle
                if (
                    len(driving_observations_before_obstacle) > 0
                    and len(driving_observations_after_obstacle) > 0
                    and driving_observations_before_obstacle.tail(1).index
                    == driving_observations_after_obstacle.head(1).index
                ):
                    # drop the first observation of after obstacle
                    driving_observations_after_obstacle = driving_observations_after_obstacle.iloc[1:]

                if (
                    len(takeover_observations_before_obstacle) > 0
                    and len(takeover_observations_after_obstacle) > 0
                    and takeover_observations_before_obstacle.tail(1).index
                    == takeover_observations_after_obstacle.head(1).index
                ):
                    # drop the first observation of after obstacle
                    takeover_observations_after_obstacle = (takeover_observations_after_obstacle.iloc[1:])

                # Check if the length of the observations is 3000
                if len(driving_observations_before_obstacle) > 4000:
                    # drop the first n rows
                    n = len(driving_observations_before_obstacle) - 4000
                    driving_observations_before_obstacle = (
                        driving_observations_before_obstacle.iloc[n:]
                    )
                elif len(driving_observations_before_obstacle) < 4000:
                    continue

                if len(driving_observations_after_obstacle) > 8000:
                    # drop the last n rows
                    driving_observations_after_obstacle = driving_observations_after_obstacle.iloc[
                        :8000
                    ]
                elif len(driving_observations_after_obstacle) < 8000:
                    continue

                if len(takeover_observations_before_obstacle) > 4000:
                    # drop the first n rows
                    n = len(takeover_observations_before_obstacle) - 4000
                    takeover_observations_before_obstacle = (
                        takeover_observations_before_obstacle.iloc[n:]
                    )
                elif len(takeover_observations_before_obstacle) < 4000:
                    continue

                if len(takeover_observations_after_obstacle) > 8000:
                    # drop the last n rows
                    takeover_observations_after_obstacle = (
                        takeover_observations_after_obstacle.iloc[:8000]
                    )
                elif len(takeover_observations_after_obstacle) < 8000:
                    continue

                # Load the participant's model
                model = ConvAutoencoder().to(device)
                model.load_state_dict(torch.load(f"../Convolutional Autoencoder Models/{participant}_model.pth"))
                model.eval()

                # Combine the observations
                driving_observations = pd.concat(
                    [driving_observations_before_obstacle, driving_observations_after_obstacle]
                )
                takeover_observations = pd.concat(
                    [takeover_observations_before_obstacle, takeover_observations_after_obstacle]
                )

                # Encode the observations
                driving_observations = driving_observations.to_numpy()
                input_observations = torch.tensor(driving_observations, dtype=torch.float32).to(device)
                latent_space = model.encode(input_observations.permute(1, 0).unsqueeze(0))
                driving_observations = latent_space.squeeze(0).permute(1, 0)

                # add the reconstruction error to the observations
                output = model.decode(latent_space).squeeze(0).permute(1, 0)
                driving_observations_error = torch.nn.functional.mse_loss(output, input_observations,) * 1000 # scale the error
                print(f"Driving Observations Error: {driving_observations_error}")
                error_expanded = driving_observations_error.expand(driving_observations.shape[0], 1)
                driving_observations = torch.cat((driving_observations, error_expanded), 1)

                takeover_observations = takeover_observations.to_numpy()
                input_observations = torch.tensor(takeover_observations, dtype=torch.float32).to(device)
                latent_space = model.encode(input_observations.permute(1, 0).unsqueeze(0))
                takeover_observations = latent_space.squeeze(0).permute(1, 0)

                # add the reconstruction error to the observations
                output = model.decode(latent_space).squeeze(0).permute(1, 0)
                takeover_observations_error = torch.nn.functional.mse_loss(output, input_observations,) * 1000 # scale the error
                print(f"Takeover Observations Error: {takeover_observations_error}")
                error_expanded = takeover_observations_error.expand(takeover_observations.shape[0], 1)
                takeover_observations = torch.cat((takeover_observations, error_expanded), 1)

                driving_observations_data.append(driving_observations)
                takeover_observations_data.append(takeover_observations)

        print(f"-" * 50)

    return driving_observations_data, takeover_observations_data

---
# Collecting Observations

In [None]:
# collect the observations for these parameters
print(f"Collecting Features")
driving_observations, takeover_observations = collect_observations(exp2_folder_path)
print(f"Collected {len(driving_observations)} driving observations and {len(takeover_observations)} takeover observations.")

---

In [None]:
driving_observations_train, driving_observations_test = train_test_split(
    driving_observations, test_size=0.3
)

takeover_observations_train, takeover_observations_test = train_test_split(
    takeover_observations, test_size=0.3
)

In [None]:
# concatenate the observations
X_driving = None
X_driving_lengths = []
for data in driving_observations_train:
    if X_driving is None:
        X_driving = data.detach().numpy()
    else:
        X_driving = np.concatenate((X_driving, data.detach().numpy()), axis=1)
    X_driving_lengths.append(data.shape[1])
X_driving

In [None]:
X_takeover = None
X_takeover_lengths = []
for data in takeover_observations_train:
    if X_takeover is None:
        X_takeover = data.detach().numpy()
    else:
        X_takeover = np.concatenate((X_takeover, data.detach().numpy()), axis=1)
    X_takeover_lengths.append(data.shape[1])
X_takeover

In [None]:
# model parameters
n_components_slow = [1, 2, 3, 4]
n_mix_slow = [1, 2, 3, 4]
n_components_fast = [1, 2, 3, 4]
n_mix_fast = [1, 2, 3, 4]
covariance_types = ["full", "diag", "spherical"]

In [None]:

# initialize and fit the models
driving_model = hmm.GMMHMM(
    n_components=1, n_mix=1, covariance_type="full"
)
takeover_model = hmm.GMMHMM(
    n_components=1, n_mix=1, covariance_type="full",
)

# fit the models
driving_model.fit(X_driving, X_driving_lengths)
takeover_model.fit(X_takeover, X_takeover_lengths)


In [None]:
# score the models
accuracy = 0
tp = 0
fp = 0
tn = 0
fn = 0

for _, observation in enumerate(driving_observations_test):
    observation = observation.detach().numpy()

    if driving_model.score(observation) > takeover_model.score(observation):
        accuracy += 1
        tn += 1
    else:
        fn += 1

for _, observation in enumerate(takeover_observations_test):
    observation = observation.detach().numpy()

    if takeover_model.score(observation) > driving_model.score(observation):
        accuracy += 1
        tp += 1
    else:
        fp += 1

accuracy = accuracy / (len(driving_observations_test) + len(takeover_observations_test))

print(f"Accuracy: {accuracy}")
print(f"True Positives: {tp}")
print(f"False Positives: {fp}")
print(f"True Negatives: {tn}")
print(f"False Negatives: {fn}")

In [None]:

# iterate over the model parameters
for n_slow in n_components_slow:
    for m_slow in n_mix_slow:
        for n_fast in n_components_fast:
            for m_fast in n_mix_fast:
                for cov in covariance_types:

                    # check if the results for these parameters already exist
                    if os.path.exists("results-cehmm.csv"):
                        results_df = pd.read_csv("results-thmm.csv")
                        results = results_df[
                            (results_df["Components Slow"] == n_slow)
                            & (results_df["Mixtures Slow"] == m_slow)
                            & (results_df["Components Fast"] == n_fast)
                            & (results_df["Mixtures Fast"] == m_fast)
                            & (results_df["Covariance Type"] == cov)
                        ]
                        if not results.empty:
                            print("Results already exist for these parameters.")
                            continue

                    print("-------------------------------------------------")
                    print(f"Slow Components: {n_slow}, Slow Mixtures: {m_slow}, Fast Components: {n_fast}, Fast Mixtures: {m_fast}, Covariance Type: {cov}")
                    try:
                        accuracies, true_positives_list, false_positives_list, true_negatives_list, false_negatives_list = accuracy(driving_observations, takeover_observations, n_slow, n_fast, m_slow, m_fast, cov)

                        # Accuracy
                        print(f"Average Accuracy: {np.mean(accuracies)}")
                        print(f"Standard Deviation: {np.std(accuracies)}")
                        print(f"Max Accuracy: {np.max(accuracies)}")
                        print(f"Min Accuracy: {np.min(accuracies)}")

                        # Find the index of the max accuracy
                        max_accuracy_index = accuracies.index(np.max(accuracies))
                        tp = true_positives_list[max_accuracy_index]
                        print(f"True Positives: {tp}")
                        fp = false_positives_list[max_accuracy_index]
                        print(f"False Positives: {fp}")
                        tn = true_negatives_list[max_accuracy_index]
                        print(f"True Negatives: {tn}")
                        fn = false_negatives_list[max_accuracy_index]
                        print(f"False Negatives: {fn}")

                        # check  if any of the values are zero
                        if tp + fp == 0 or tp + fn == 0:
                            precision = 0
                            recall = 0
                            f1_score = 0
                        else:
                            # Precision, Recall, and F1 Score
                            precision = tp / (tp + fp)
                            recall = tp / (tp + fn)
                            f1_score = 2 * precision * recall / (precision + recall)
                        print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1_score}')
                        print("-------------------------------------------------")
                        print()
                        results = [step_size, n_slow, m_slow, n_fast, m_fast, cov, np.mean(accuracies), np.std(accuracies), np.max(accuracies), np.min(accuracies), tp, fp, tn, fn, precision, recall, f1_score]
                    except:
                        print(f"Model Parameters: {n_slow}, {m_slow}, {n_fast}, {m_fast}, {cov} failed.")
                        results = [step_size, n_slow, m_slow, n_fast, m_fast, cov, None, None, None, None, None, None, None, None, None, None, None]

                    results_columns = ["Feature Type", "Window Size", "Step Size", "Components Slow", "Mixtures Slow",  "Components Fast", "Mixtures Fast", "Covariance Type", "Mean Accuracy", "Standard Deviation", "Max Accuracy", "Min Accuracy", "True Positives", "False Positives", "True Negatives", "False Negatives", "Precision", "Recall", "F1 Score"]
                    # save the results
                    if os.path.exists("results-cehmm.csv"):
                        # see if the results file exists
                        results_df = pd.read_csv("results-cehmm.csv")
                        # add the results to the file
                        results_df = pd.concat([results_df, pd.DataFrame([results], columns=results_columns)])
                        results_df.to_csv("results-cehmm.csv", index=False)
                    else:
                        # create the results file
                        results_df = pd.DataFrame([results], columns=results_columns)
                    results_df.to_csv("results-cehmm.csv", index=False)