In [1]:
import os
import sys
import numpy as np
import pandas as pd
import neurokit2 as nk
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import RandomizedSearchCV


sys.path.append("../")
from useful_functions.physio_data.process_physio_timestamps import process_physio_timestamps
from useful_functions.check_for_missing_data import check_for_missing_data

---

In [2]:
driving_data_folder = "../../AdVitam/Exp2/Raw/Driving"
physio_data_folder = "../../AdVitam/Exp2/Raw/Physio/Txt"

In [3]:
drivers_to_exclude = check_for_missing_data(driving_data_folder, physio_data_folder)
drivers_to_exclude.extend(
    ["NST77", "NST11"]
)  # , "ST22", "NST87", "ST14", "ST12", "NST73", "ST10"])

---

### Reading in the saved data

In [4]:
# directory = "../../AdVitam_Processed/Exp2/Processed/Physio"

# for filename in os.listdir(directory):
#     driver_name = filename

#     file_path = os.path.join(directory, filename)
#     for file in os.listdir(file_path):
#           if file.endswith(".csv"):
#                file_path = os.path.join(directory, filename)
#                os.remove(file_path)

PermissionError: [Errno 1] Operation not permitted: '../../AdVitam_Processed/Exp2/Processed/Physio/ST66'

In [5]:
# loop through all the files in the directory
directory = "../../AdVitam_Processed/Exp2/Processed/Physio"

baseline_file_name = '_baseline'
training_file_name = '_training'
driving_file_name = '_driving'

physiological_data = {}
for filename in os.listdir(directory):
    # the name of the folder is the driver's name
    driver_name = filename
    
    # set the file paths
    file_path = os.path.join(directory, filename)
    driver_baseline_file = file_path + '/' + driver_name + baseline_file_name + '.csv'
    driver_training_file = file_path + '/' + driver_name + training_file_name + '.csv'
    driver_driving_file = file_path + '/' + driver_name + driving_file_name + '.csv'

    # read the csv file
    baseline_data = pd.read_csv(driver_baseline_file)
    training_data = pd.read_csv(driver_training_file)
    driving_data = pd.read_csv(driver_driving_file)

    # save the files as txt files
    baseline_data.to_csv(driver_baseline_file[:-4] + '.txt', index=False)
    training_data.to_csv(driver_training_file[:-4] + '.txt', index=False)
    driving_data.to_csv(driver_driving_file[:-4] + '.txt', index=False)

    # # save the files in the dictionary
    # driver_data = {
    #     'baseline': baseline_data,
    #     'training': training_data,
    #     'driving': driving_data
    # }
    # physiological_data[driver_name] = driver_data

: 

---

Steps:
1. Train HMM to baseline data
    - Using 10 second sliding window
2. Train HMM to training data
    - Using 10 second sliding window
3. Test HMM on driving data
    - Using 10 second sliding window

---

In [None]:
# grab the data
driver = "NST51"

# driver data
driver_baseline_data = physiological_data[driver]["baseline"]
driver_baseline_data = driver_baseline_data.drop(columns=driver_baseline_data.columns[0])
driver_training_data = physiological_data[driver]["training"]
driver_training_data = driver_training_data.drop(columns=driver_training_data.columns[0])
driver_driving_data = physiological_data[driver]["driving"]
driver_driving_data = driver_driving_data.drop(columns=driver_driving_data.columns[0])

In [None]:
# train the model on the baseline data using 10sec sliding windows


In [None]:
# driver timestamps
physio_timestamps = pd.read_csv(
    "../../AdVitam/Exp2/Preprocessed/Physio and Driving/timestamps_obstacles.csv"
)
physio_timestamps = process_physio_timestamps(physio_timestamps, drivers_to_exclude)
driver_timestamps = physio_timestamps[physio_timestamps["subject_id"] == driver]

In [None]:
# initialize lists to store observations
slow_observations = []
fast_observations = []

In [None]:
# construct the observations
driver_baseline_data = driver_baseline_data.drop(columns=["Time"])
driver_baseline_data = driver_baseline_data.to_numpy()
baseline_observations = np.vstack(driver_baseline_data)

In [None]:
# initializing the hyperparameters
n_components = np.arange(1, 11)
covariance_type = ["full", "tied", "diag", "spherical"]
tol = np.arange(0.001, 0.011, 0.001)
init_params = ["kmeans", "k-means++", "random", "random_from_data"]
random_state = np.arange(0, 11)
max_iter = np.linspace(100, 10000, 100).astype(int)

hyperparametes = {
    "n_components": n_components,
    "covariance_type": covariance_type,
    "tol": tol,
    "init_params": init_params,
    "random_state": random_state,
    "max_iter": max_iter,
}

# initialize the model
gmm = GaussianMixture()

# random search
gmm_random = RandomizedSearchCV(
    estimator=gmm,
    param_distributions=hyperparametes,
    n_iter=1000,
    cv=5,
    verbose=2,
    n_jobs=-1,
)

# fit the model
gmm_random.fit(baseline_observations)

In [None]:
gmm = gmm_random.best_estimator_

# fit the model
gmm.fit(baseline_observations)

In [None]:
driver_driving_data

In [None]:
# convert the time to datetime
driver_driving_data['Time'] = pd.to_datetime(driver_driving_data['Time'], format='%Y-%m-%d %H:%M:%S.%f')


In [None]:
slow_observations = []
fast_observations = []

# loop through every takeover
for column in driver_timestamps.columns:
    if "TOT" in column:
        # get the obstacle number
        obstacle = column.replace("TOT", "")

        # store the obstacle triggers for driving and physio
        physio_obstacle_trigger = driver_timestamps["Triggered" + obstacle].iloc[0]

        # check if the obstacle triggers are not null
        if pd.isnull(physio_obstacle_trigger):
            continue

        # trim the data to the 10s before the takeover
        physio_data_10_sec = driver_driving_data[
            (
                driver_driving_data["Time"]
                >= (
                    driver_driving_data.Time.min()
                    + physio_obstacle_trigger
                    - pd.to_timedelta("10s")
                )
            )
            & (
                driver_driving_data["Time"]
                < driver_driving_data.Time.min() + physio_obstacle_trigger
            )
        ].copy()

        # reset the Time index
        physio_data_10_sec = physio_data_10_sec.set_index("Time")

        # set the index to 0
        physio_data_10_sec.index = physio_data_10_sec.index - physio_data_10_sec.index.min()

        # reset the index
        driver_data.reset_index(inplace=True)

        # determine if the takeover was slow or fast
        if driver_timestamps[column].iloc[0] > pd.to_timedelta("3s"):
            slow_observations.append(driver_data.to_numpy())
        else:
            fast_observations.append(driver_data.to_numpy())