In [7]:
import os
import warnings

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim

from hmmlearn import hmm

# Suppress warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convolutional Autoencoder Structure:

In [9]:
# Define the Convolutional Autoencoder (CAE) architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()

        self.ls = 8

        # Define separate encoders for each signal
        self.ecg_encoder = nn.Sequential(
            nn.Conv1d(1, 4, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(4, 8, kernel_size=3, stride=2, padding=1),
        )
        self.rsp_encoder = self.ecg_encoder
        self.eda_tonic_encoder = self.ecg_encoder
        self.eda_phasic_encoder = self.ecg_encoder

        # Fully connected layer to compress the latent space
        self.fc = nn.Linear(32, self.ls)

        # Fully connected layer to decompress the latent space
        self.fc_decoded = nn.Linear(self.ls, 32)

        # Define separate decoders for each signal
        self.ecg_decoder = nn.Sequential(
            nn.ConvTranspose1d(8, 4, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(4, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),
        )
        self.rsp_decoder = self.ecg_decoder
        self.eda_tonic_decoder = self.ecg_decoder
        self.eda_phasic_decoder = self.ecg_decoder

    def encode(self, x):
        # Split input by channel for independent processing
        ecg = x[:, 0, :].unsqueeze(1)
        rsp = x[:, 1, :].unsqueeze(1)
        eda_tonic = x[:, 2, :].unsqueeze(1)
        eda_phasic = x[:, 3, :].unsqueeze(1)

        # Encode each signal independently
        ecg_encoded = self.ecg_encoder(ecg)
        rsp_encoded = self.rsp_encoder(rsp)
        eda_tonic_encoded = self.eda_tonic_encoder(eda_tonic)
        eda_phasic_encoded = self.eda_phasic_encoder(eda_phasic)

        # Concatenate the latent representations along the last dimension
        latent_space = torch.cat(
            (ecg_encoded, rsp_encoded, eda_tonic_encoded, eda_phasic_encoded), dim=1
        )

        latent_space = latent_space.permute(0, 2, 1)

        # Compress the latent space
        latent_space = self.fc(latent_space)

        return latent_space

    def decode(self, latent_space):
        # Decompress the latent space
        latent_space = self.fc_decoded(latent_space)
        latent_space = latent_space.permute(0, 2, 1)

        # Split latent space back into separate channels
        ecg_latent, rsp_latent, eda_tonic_latent, eda_phasic_latent = torch.split(
            latent_space, self.ls, dim=1
        )
        # Decode each signal independently
        ecg_decoded = self.ecg_decoder(ecg_latent)
        rsp_decoded = self.rsp_decoder(rsp_latent)
        eda_tonic_decoded = self.eda_tonic_decoder(eda_tonic_latent)
        eda_phasic_decoded = self.eda_phasic_decoder(eda_phasic_latent)

        # Concatenate the decoded signals to form the output
        reconstructed = torch.cat(
            (ecg_decoded, rsp_decoded, eda_tonic_decoded, eda_phasic_decoded), dim=1
        )

        return reconstructed

    def forward(self, x):
        latent_space = self.encode(x)
        reconstructed = self.decode(latent_space)
        return reconstructed

In [10]:
learning_rate = 0.001

# Train Autoencoders to Each Participants' Baseline Data

In [11]:
segment_size = "12s"
step_size = "0.001s"

In [12]:
# loop through baseline data
for file in os.listdir("./Physiological Preprocessed/Exp2"):
    participant = file.split("_")[0]
    if "baseline" not in file:
        continue
    elif os.path.exists(f"./Convolutional Autoencoder Models/Presence of Takeovers/{participant}_model.pth"):
        print(f"Model for {file} already exists")
        continue

    print(f"Loading {participant} data")
    print(f"-" * 50)

    # load data
    physiological_data = pd.read_csv(f"./Physiological Preprocessed/Exp2/{file}", usecols=["Time", "ECG_Clean", "RSP_Clean", "EDA_Tonic", "EDA_Phasic"])
    physiological_data["Time"] = pd.to_timedelta(physiological_data["Time"])
    physiological_data.set_index("Time", inplace=True)

    print(f"Processing participant {participant} data")

    # Normalize the data
    scalar = MinMaxScaler()
    data = scalar.fit_transform(physiological_data)
    physiological_data = pd.DataFrame(data, columns=physiological_data.columns, index=physiological_data.index)

    # Split the data into sliding windows
    X = []
    len_segment = pd.Timedelta(segment_size) / pd.Timedelta(step_size)
    while len(physiological_data) > 0:
        start_index = physiological_data.index[0]
        end_index = start_index + pd.Timedelta(segment_size)
        segment = physiological_data[:end_index]
        physiological_data = physiological_data[end_index:]

        if len(segment) > len_segment:
            length = len(segment) - len_segment
            segment = segment.drop(segment.tail(int(length)).index)

        if len(segment) == len_segment:
            X.append(segment.to_numpy())

    X = np.stack(X)

    # Convert to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32).to(device)

    features = X.shape[2]

    # Initialize the model
    model = ConvAutoencoder().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

    # Training parameters
    batch_size = 4
    num_epochs = 300

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for i in range(0, len(X), batch_size):

            # Get the batch and reshape it for Conv1d (batch, channels, sequence_length)
            batch = X[i : i + batch_size].permute(0, 2, 1)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(batch)

            # Calculate loss
            loss = criterion(outputs, batch)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(X):.4f}")

    # Save the model
    if not os.path.exists("./Convolutional Autoencoder Models/Presence of Takeovers"):
        os.makedirs("./Convolutional Autoencoder Models/Presence of Takeovers")
    torch.save(model.state_dict(), f"./Convolutional Autoencoder Models/Presence of Takeovers/{participant}_model.pth")

Loading ST44 data
--------------------------------------------------
Processing participant ST44 data
Epoch [1/300], Loss: 0.0099
Epoch [2/300], Loss: 0.0097
Epoch [3/300], Loss: 0.0096
Epoch [4/300], Loss: 0.0095
Epoch [5/300], Loss: 0.0094
Epoch [6/300], Loss: 0.0094
Epoch [7/300], Loss: 0.0093
Epoch [8/300], Loss: 0.0092
Epoch [9/300], Loss: 0.0092
Epoch [10/300], Loss: 0.0091
Epoch [11/300], Loss: 0.0091
Epoch [12/300], Loss: 0.0090
Epoch [13/300], Loss: 0.0089
Epoch [14/300], Loss: 0.0088
Epoch [15/300], Loss: 0.0087
Epoch [16/300], Loss: 0.0086
Epoch [17/300], Loss: 0.0085
Epoch [18/300], Loss: 0.0084
Epoch [19/300], Loss: 0.0083
Epoch [20/300], Loss: 0.0082
Epoch [21/300], Loss: 0.0081
Epoch [22/300], Loss: 0.0079
Epoch [23/300], Loss: 0.0077
Epoch [24/300], Loss: 0.0076
Epoch [25/300], Loss: 0.0073
Epoch [26/300], Loss: 0.0071
Epoch [27/300], Loss: 0.0067
Epoch [28/300], Loss: 0.0063
Epoch [29/300], Loss: 0.0060
Epoch [30/300], Loss: 0.0057
Epoch [31/300], Loss: 0.0053
Epoch [3

---
## Constructing Observations

### Grab the physiological timestamps / takeover times

In [13]:
processed_physio_folder_path = "./Physiological Preprocessed/"

exp2_folder_path = processed_physio_folder_path + "Exp2"

exp2_takeover_times = pd.read_csv(
    "./AdVitam/Exp2/Preprocessed/Physio and Driving/timestamps_obstacles.csv"
)
exp2_takeover_times.iloc[:, 2:] = exp2_takeover_times.iloc[:, 2:].apply(pd.to_timedelta, unit="s")
exp2_takeover_times["subject_id"] = exp2_takeover_times["subject_id"].apply(
    lambda x: x.split("T")[0] + "T" + x.split("T")[1].zfill(2)
)
exp2_takeover_times["subject_id"] = exp2_takeover_times["subject_id"].astype(str)
exp2_takeover_times.drop(columns=["label_st"], inplace=True)
exp2_takeover_times.sort_values(by=["subject_id"], inplace=True)

for column in exp2_takeover_times.columns:
    if "TrigObs" in column:
        exp2_takeover_times = exp2_takeover_times.rename(
            columns={column: column.replace("TrigObs", "") + "TOR"}
        )
    elif "RepObs" in column:
        exp2_takeover_times = exp2_takeover_times.rename(
            columns={column: column.replace("RepObs", "Response")}
        )

exp2_takeover_times

Unnamed: 0,subject_id,DeerTOR,DetObsDeer,ResponseDeer,ConeTOR,DetObsCone,ResponseCone,FrogTOR,DetObsFrog,ResponseFrog,CanTOR,DetObsCan,ResponseCan,FA1TOR,DetObsFA1,ResponseFA1,FA2TOR,DetObsFA2,ResponseFA2
0,NST01,0 days 00:02:56.705100,0 days 00:02:59.093200,0 days 00:03:03.523800,0 days 00:06:56.214000,0 days 00:06:58.510900,0 days 00:06:58.690200,0 days 00:08:34.815700,0 days 00:08:38.845600,NaT,0 days 00:13:06.640800,0 days 00:13:12.859100,NaT,0 days 00:16:23.624000,NaT,NaT,0 days 00:18:02.245000,0 days 00:18:06.363000,NaT
2,NST03,0 days 00:13:35.204000,0 days 00:13:38.469300,0 days 00:13:39.824400,0 days 00:10:19.808800,0 days 00:10:21.816700,0 days 00:10:23.032200,0 days 00:04:19.471200,0 days 00:04:23.789000,NaT,0 days 00:17:07.400900,0 days 00:17:10.785900,NaT,0 days 00:06:18.340900,NaT,NaT,0 days 00:18:35.174900,0 days 00:18:42.818900,NaT
4,NST05,0 days 00:07:08.961300,0 days 00:07:10.742400,0 days 00:07:11.272600,0 days 00:12:23.166400,0 days 00:12:24.461700,0 days 00:12:38.273600,0 days 00:14:59.418600,0 days 00:15:00.724600,0 days 00:15:02.753600,0 days 00:04:28.071600,0 days 00:04:29.987500,0 days 00:04:31.402700,0 days 00:02:23.631400,0 days 00:02:24.742400,NaT,0 days 00:10:29.173600,0 days 00:10:30.421200,NaT
6,NST07,0 days 00:02:36.060300,0 days 00:02:39.121900,0 days 00:02:46.883100,0 days 00:06:11.856300,0 days 00:06:13.610700,0 days 00:06:15.090900,0 days 00:08:28.559300,0 days 00:08:31.772600,0 days 00:08:32.484500,0 days 00:12:00.085800,0 days 00:12:02.153100,0 days 00:12:02.817700,0 days 00:16:34.142400,0 days 00:16:37.435400,NaT,0 days 00:18:05.472400,0 days 00:18:08.195400,NaT
8,NST09,0 days 00:13:11.586070,0 days 00:13:13.970070,0 days 00:13:16.070470,0 days 00:08:18.826970,0 days 00:08:20.710570,0 days 00:08:22.473270,0 days 00:03:16.580170,0 days 00:03:21.515270,0 days 00:03:24.339770,0 days 00:15:36.305270,0 days 00:15:38.094270,0 days 00:15:39.460270,0 days 00:05:37.980670,0 days 00:05:52.104570,NaT,0 days 00:19:03.294270,0 days 00:19:07.322270,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,ST82,0 days 00:16:22.060600,0 days 00:16:25.019600,0 days 00:16:26.115600,0 days 00:11:27.337900,0 days 00:11:30.174000,0 days 00:11:31.206600,0 days 00:05:00.722700,0 days 00:05:05.274800,0 days 00:05:04.673200,0 days 00:02:09.414300,0 days 00:02:12.728300,NaT,0 days 00:08:01.118200,0 days 00:08:23.261900,NaT,0 days 00:13:10.705300,0 days 00:13:37.020600,NaT
83,ST84,0 days 00:13:08.533100,0 days 00:13:10.099000,0 days 00:13:15.842500,0 days 00:16:19.256299999,0 days 00:16:20.686300,0 days 00:16:22.944300,0 days 00:02:01.331000,0 days 00:02:02.753000,0 days 00:02:04.980400,0 days 00:08:02.602800,0 days 00:08:04.146900,0 days 00:05:04.680500,0 days 00:02:01.331000,0 days 00:02:02.753000,0 days 00:02:04.980400,0 days 00:05:00.889300,0 days 00:05:02.173200,0 days 00:05:04.680500
85,ST86,0 days 00:05:00.561600,0 days 00:05:02.025700,0 days 00:05:03.506499999,0 days 00:02:56.399600,0 days 00:02:57.203300,0 days 00:03:04.042600,0 days 00:13:40.438800,0 days 00:13:42.381800,0 days 00:13:45.033800,0 days 00:16:52.421300,0 days 00:16:54.161300,0 days 00:16:54.876300,0 days 00:08:11.486600,0 days 00:08:13.166100,NaT,0 days 00:10:30.702800,0 days 00:10:32.862900,NaT
87,ST88,0 days 00:17:33.644200,0 days 00:17:35.548200,0 days 00:17:38.642200,0 days 00:11:47.429500,0 days 00:11:49.730800,0 days 00:11:52.368200,0 days 00:05:03.127099999,0 days 00:05:07.203900,0 days 00:05:07.837400,0 days 00:02:22.286400,0 days 00:02:23.154300,NaT,0 days 00:08:03.736300,0 days 00:08:08.586700,NaT,0 days 00:13:32.521300,0 days 00:13:34.723000,NaT


---

In [14]:
higher_error = 0

# Exp2
for file in os.listdir(exp2_folder_path):
    # Split the file name into the participant and period
    f = file.split("_")
    participant = f[0]
    period = f[1].split(".")[0]

    if "baseline" in period:
        continue
    elif "driving" in period:
        print(participant)
        print(f"-" * 50)

        # Process the physiological data
        experiment_physio = pd.read_csv(
            exp2_folder_path + "/" + file,
            usecols=["Time", "ECG_Clean", "RSP_Clean", "EDA_Tonic", "EDA_Phasic"],
        )
        experiment_physio["Time"] = pd.to_timedelta(experiment_physio["Time"])
        experiment_physio.set_index("Time", inplace=True)

        # Normalize the data
        scalar = MinMaxScaler()
        data = scalar.fit_transform(experiment_physio)
        experiment_physio = pd.DataFrame(
            data, columns=experiment_physio.columns, index=experiment_physio.index
        )

        # Obstacle Trigger Times
        participant_takeover_times = exp2_takeover_times[
            exp2_takeover_times["subject_id"] == participant
        ].copy()
        participant_takeover_times.iloc[:, 1:] = participant_takeover_times.iloc[:, 1:].apply(
            pd.to_timedelta, args=("s",), errors="coerce"
        )

        obstacles = ["Deer", "Cone", "Frog", "Can"]
        for obstacle in obstacles:
            print(obstacle)

            # Obstacle Trigger Time
            obstacle_trigger_time = pd.to_timedelta(
                participant_takeover_times[f"{obstacle}TOR"].values[0], unit="s"
            )
            minute_before_obstacle = obstacle_trigger_time - pd.Timedelta(seconds=60)

            # If the obstacle trigger time is null, skip the obstacle
            if pd.isnull(obstacle_trigger_time):
                continue
            if pd.isnull(minute_before_obstacle):
                continue

            # Observations 1 minute before and after the obstacle
            driving_observations_before_obstacle = experiment_physio.loc[
                minute_before_obstacle - pd.Timedelta(seconds=4) : minute_before_obstacle
            ].copy()
            driving_observations_after_obstacle = experiment_physio.loc[
                minute_before_obstacle : minute_before_obstacle + pd.Timedelta(seconds=8)
            ].copy()

            # Observations 3 seconds before and after the obstacle
            takeover_observations_before_obstacle = experiment_physio.loc[
                obstacle_trigger_time - pd.Timedelta(seconds=4) : obstacle_trigger_time
            ].copy()
            takeover_observations_after_obstacle = experiment_physio.loc[
                obstacle_trigger_time : obstacle_trigger_time + pd.Timedelta(seconds=8)
            ].copy()

            # Check if the last observation of before obstacle is the same as the first observation of after obstacle
            if (
                len(driving_observations_before_obstacle) > 0
                and len(driving_observations_after_obstacle) > 0
                and driving_observations_before_obstacle.tail(1).index
                == driving_observations_after_obstacle.head(1).index
            ):
                # drop the first observation of after obstacle
                driving_observations_after_obstacle = driving_observations_after_obstacle.iloc[1:]

            if (
                len(takeover_observations_before_obstacle) > 0
                and len(takeover_observations_after_obstacle) > 0
                and takeover_observations_before_obstacle.tail(1).index
                == takeover_observations_after_obstacle.head(1).index
            ):
                # drop the first observation of after obstacle
                takeover_observations_after_obstacle = takeover_observations_after_obstacle.iloc[1:]

            # Check if the length of the observations is 3000
            if len(driving_observations_before_obstacle) > 4000:
                # drop the first n rows
                n = len(driving_observations_before_obstacle) - 4000
                driving_observations_before_obstacle = driving_observations_before_obstacle.iloc[n:]
            elif len(driving_observations_before_obstacle) < 4000:
                continue

            if len(driving_observations_after_obstacle) > 8000:
                # drop the last n rows
                driving_observations_after_obstacle = driving_observations_after_obstacle.iloc[
                    :8000
                ]
            elif len(driving_observations_after_obstacle) < 8000:
                continue

            if len(takeover_observations_before_obstacle) > 4000:
                # drop the first n rows
                n = len(takeover_observations_before_obstacle) - 4000
                takeover_observations_before_obstacle = takeover_observations_before_obstacle.iloc[
                    n:
                ]
            elif len(takeover_observations_before_obstacle) < 4000:
                continue

            if len(takeover_observations_after_obstacle) > 8000:
                # drop the last n rows
                takeover_observations_after_obstacle = takeover_observations_after_obstacle.iloc[
                    :8000
                ]
            elif len(takeover_observations_after_obstacle) < 8000:
                continue

            # Load the participant's model
            model = ConvAutoencoder().to(device)
            model.load_state_dict(
                torch.load(f"./Convolutional Autoencoder Models/Presence of Takeovers/{participant}_model.pth")
            )
            model.eval()

            # Combine the observations
            driving_observations = pd.concat(
                [driving_observations_before_obstacle, driving_observations_after_obstacle]
            )
            takeover_observations = pd.concat(
                [takeover_observations_before_obstacle, takeover_observations_after_obstacle]
            )

            # Encode the observations
            driving_observations = driving_observations.to_numpy()
            input_observations = torch.tensor(driving_observations, dtype=torch.float32).to(device)
            latent_space = model.encode(input_observations.permute(1, 0).unsqueeze(0))
            driving_observations = latent_space.squeeze(0).permute(1, 0)

            # add the reconstruction error to the observations
            output = model.decode(latent_space).squeeze(0).permute(1, 0)
            driving_observations_error = (
                torch.nn.functional.mse_loss(
                    output,
                    input_observations,
                )
                * 1000
            )  # scale the error
            print(f"Driving Observations Error: {driving_observations_error}")
            error_expanded = driving_observations_error.expand(
                driving_observations.shape[1], 1
            ).permute(1, 0)
            driving_observations = torch.cat((driving_observations, error_expanded), 0)

            takeover_observations = takeover_observations.to_numpy()
            input_observations = torch.tensor(takeover_observations, dtype=torch.float32).to(device)
            latent_space = model.encode(input_observations.permute(1, 0).unsqueeze(0))
            takeover_observations = latent_space.squeeze(0).permute(1, 0)

            # add the reconstruction error to the observations
            output = model.decode(latent_space).squeeze(0).permute(1, 0)
            takeover_observations_error = (
                torch.nn.functional.mse_loss(
                    output,
                    input_observations,
                )
                * 1000
            )  # scale the error
            print(f"Takeover Observations Error: {takeover_observations_error}")
            error_expanded = takeover_observations_error.expand(
                takeover_observations.shape[1], 1
            ).permute(1, 0)
            takeover_observations = torch.cat((takeover_observations, error_expanded), 0)

            if driving_observations_error > takeover_observations_error:
                higher_error = higher_error + 1
                print(f"Higher Error: {higher_error}")

            print(f"-" * 50)

print(f"Higher Error: {higher_error}")
print(f"Total: {len(os.listdir(exp2_folder_path)) * 4}")

print(f"{higher_error / (len(os.listdir(exp2_folder_path)) * 4) * 100}%")

ST88
--------------------------------------------------
Deer
Driving Observations Error: 1.2588304281234741
Takeover Observations Error: 4.994279861450195
--------------------------------------------------
Cone
Driving Observations Error: 1.2207791805267334
Takeover Observations Error: 1.0642640590667725
Higher Error: 1
--------------------------------------------------
Frog
Driving Observations Error: 1.9622762203216553
Takeover Observations Error: 1.5799169540405273
Higher Error: 2
--------------------------------------------------
Can
Driving Observations Error: 2.83007550239563
Takeover Observations Error: 1.822409749031067
Higher Error: 3
--------------------------------------------------
NST81
--------------------------------------------------
Deer
Driving Observations Error: 0.07310201227664948
Takeover Observations Error: 0.15942463278770447
--------------------------------------------------
Cone
Driving Observations Error: 0.12442996352910995
Takeover Observations Error: 0.108