In [1]:
import numpy as np
import pandas as pd
import neurokit2 as nk
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import (
    train_test_split,
    GridSearchCV,
)

In [2]:
from useful_functions.driving_data.dd_dictionary import create_dd_dictionary
from useful_functions.driving_data.process_driving_data import processing_driving_data

from useful_functions.physio_data.pd_dictionary import create_pd_dictionary
from useful_functions.physio_data.process_physio_timestamps import process_physio_timestamps
from useful_functions.physio_data.process_physio_data import process_physio_data

from useful_functions.demographic_data.process_driver_demographic_data import (
    process_driver_demographic_data,
)

from useful_functions.construct_observations import construct_observations

from useful_functions.takeover_dataframe import create_takeover_timestamps
from useful_functions.check_for_missing_data import check_for_missing_data

Storing the folder paths to raw data


In [3]:
driving_data_folder = "../AdVitam/Exp2/Raw/Driving"
physio_data_folder = "../AdVitam/Exp2/Raw/Physio/Txt"

Storing a list of driver files to exclude

In [4]:
drivers_to_exclude = check_for_missing_data(driving_data_folder, physio_data_folder)
drivers_to_exclude.extend(["NST77", "NST11"])

<br></br>


### Driving Data Dictionary


Creates a dictionary of the raw driving data files.


In [5]:
driving_data_dictionary = create_dd_dictionary(driving_data_folder, drivers_to_exclude)

Processing driving data


In [6]:
# Fitting a Label Encoder to the `Obstacles` column
driver_data = driving_data_dictionary["NST01"]
driver_data = driver_data.fillna("Nothing")
enc = preprocessing.LabelEncoder()
enc.fit(driver_data["Obstacles"])

# Processing the driving data
driving_data_dictionary = processing_driving_data(driving_data_dictionary, enc)

Creating driving data takeover timestamps


In [7]:
driving_timestamps = create_takeover_timestamps(driving_data_dictionary, enc)

Creates a dictionary of the raw physiological data and their markers


In [8]:
phsyiological_data_dictionary = create_pd_dictionary(physio_data_folder, drivers_to_exclude)

Processing the Physiological data


In [9]:
phsyiological_data_dictionary = process_physio_data(phsyiological_data_dictionary)

<br></br>


A dataframe to store the trigger time, takeover time, release time, and TOT for each obstacle, for every driver. Similar to `driver_timestamps`.


In [10]:
physio_timestamps = pd.read_csv(
    "../AdVitam/Exp2/Preprocessed/Physio and Driving/timestamps_obstacles.csv"
)

Processing the Physio Timestamps

In [11]:
physio_timestamps = process_physio_timestamps(physio_timestamps, drivers_to_exclude)

<br></br>


Grabbing the driver demographic data


In [12]:
driver_demographic_data = pd.read_csv(
    "../AdVitam/Exp2/Preprocessed/Questionnaires/Exp2_Database.csv",
    usecols=[
        "code",
        "sex",
        "age",
        "mothertongue",
        "education",
        "driving_license",
        "km_year",
        "accidents",
    ],
)

<br></br>


Processing driver demographic data


In [13]:
driver_demographic_data = process_driver_demographic_data(
    driver_demographic_data, drivers_to_exclude
)

# ECG Data
- Root Mean Square of Successive Difference (RMSSD)​
- Standard Deviation of Normal-to-Normal intervals (SDNN)​
- Proportion of beats that differ by more than 50ms


In [73]:
driver = "ST16"
# data for each driver
driver_driving_data = driving_data_dictionary[driver]
driver_phyio_baseline_data = phsyiological_data_dictionary[driver]["baseline"]
driver_physio_data = phsyiological_data_dictionary[driver]["driving"]

# timestamps
driver_driving_timestamps = driving_timestamps[driving_timestamps["subject_id"] == driver]
driver_physio_timestamps = physio_timestamps[physio_timestamps["subject_id"] == driver]

In [74]:
baseline_hrv = nk.hrv_time(driver_phyio_baseline_data, sampling_rate=100)
# drop columns
baseline_hrv = baseline_hrv.drop(
    columns=["HRV_SDANN1", "HRV_SDNNI1", "HRV_SDANN2", "HRV_SDNNI2", "HRV_SDANN5", "HRV_SDNNI5"]
)

In [75]:
column = "TOTObs1"

# get the obstacle number
obstacle = column.replace("TOT", "")

# store the obstacle triggers for driving and physio
driving_obstacle_trigger = driver_driving_timestamps["Triggered" + obstacle].iloc[0]
physio_obstacle_trigger = driver_physio_timestamps["Triggered" + obstacle].iloc[0]

In [76]:
# trim the data to the 10s before the takeover
driving_data_10_sec = driver_driving_data[
    (driver_driving_data["Time"] >= driving_obstacle_trigger - pd.to_timedelta("10s"))
    & (driver_driving_data["Time"] < driving_obstacle_trigger)
].copy()

physio_data_10_sec = driver_physio_data[
    (
        driver_physio_data["Time"]
        >= (driver_physio_data.Time.min() + physio_obstacle_trigger - pd.to_timedelta("10s"))
    )
    & (driver_physio_data["Time"] < driver_physio_data.Time.min() + physio_obstacle_trigger)
].copy()

In [77]:
takeover_hrv = nk.hrv_time(physio_data_10_sec, sampling_rate=100)
# drop columns
takeover_hrv = takeover_hrv.drop(
    columns=["HRV_SDANN1", "HRV_SDNNI1", "HRV_SDANN2", "HRV_SDNNI2", "HRV_SDANN5", "HRV_SDNNI5"]
)

In [78]:
# Store the Difference between the baseline and the takeover
hrv_difference = takeover_hrv - baseline_hrv

In [79]:
# concatenate the dataframes
hrv = pd.concat([baseline_hrv, takeover_hrv, hrv_difference], axis=1)


In [80]:
# reset the Time index
driving_data_10_sec = driving_data_10_sec.set_index("Time")
physio_data_10_sec = physio_data_10_sec.set_index("Time")

# set the index to 0
driving_data_10_sec.index = (
    driving_data_10_sec.index - driving_data_10_sec.index.min()
)
physio_data_10_sec.index = physio_data_10_sec.index - physio_data_10_sec.index.min()

# merge the data
driver_data = pd.merge(
    driving_data_10_sec,
    physio_data_10_sec,
    left_index=True,
    right_index=True,
)

# reset the index
driver_data.reset_index(inplace=True)

# Remove Time, Position X, Position Y, Position Z, Autonomous Mode (T/F), Obstacles
driver_data = driver_data.drop(
    columns=[
        "Time",
        " Position X",
        "Position Y",
        "Position Z",
        "Autonomous Mode (T/F)",
        "Obstacles",
    ]
)

# grab driver demogrpahic data
demo_data = driver_demographic_data[driver_demographic_data["code"] == driver]

# Broadcast to repeat the static data for each row of the dynamic data
demo_data = pd.concat([demo_data] * len(driver_data), ignore_index=True)

# Broadcast the hrv data
hrv = pd.concat([hrv] * len(driver_data), ignore_index=True)

# merge the data
driver_data = pd.merge(driver_data, demo_data, left_index=True, right_index=True)
driver_data = pd.merge(driver_data, hrv, left_index=True, right_index=True)

# change the code value to the driver id
driver_data["code"] = driver_data["code"].apply(lambda x: x.split("T")[1])
# cast code to int
driver_data["code"] = driver_data["code"].astype(int)

if len(driver_data) != 1000:
    print("Error")



In [71]:
hrv

Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_RMSSD,HRV_SDSD,HRV_CVNN,HRV_CVSD,HRV_MedianNN,HRV_MadNN,HRV_MCVNN,HRV_IQRNN,...,HRV_IQRNN.1,HRV_SDRMSSD,HRV_Prc20NN,HRV_Prc80NN,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN
0,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
1,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
2,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
3,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
4,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
996,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
997,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
998,699.97619,43.242662,22.846379,22.87268,0.061777,0.032639,700.0,44.478,0.06354,60.0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5


In [81]:
driver_data

Unnamed: 0,SteeringWheelAngle,VehicleSpeed,ECG_Raw,ECG_Clean,ECG_Rate,ECG_Quality,ECG_R_Peaks,ECG_P_Peaks,ECG_P_Onsets,ECG_P_Offsets,...,HRV_IQRNN,HRV_SDRMSSD,HRV_Prc20NN,HRV_Prc80NN,HRV_pNN50,HRV_pNN20,HRV_MinNN,HRV_MaxNN,HRV_HTI,HRV_TINN
0,-31.0,53.0615,0.058289,0.057049,88.993547,0.999503,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
1,-31.0,53.0615,0.051880,0.056670,88.950439,0.999502,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
2,-31.0,53.0577,0.053711,0.058199,88.906329,0.999500,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
3,-31.0,53.0537,0.062866,0.063466,88.861287,0.999498,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
4,-31.0,53.0537,0.064087,0.065072,88.815378,0.999495,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.0,52.5230,-0.027771,-0.009399,83.333333,0.996379,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
996,0.0,52.5306,-0.015259,-0.002385,83.333333,0.996406,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
997,0.0,52.5306,-0.017090,-0.003488,83.333333,0.996431,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5
998,0.0,52.5393,-0.029297,-0.009427,83.333333,0.996454,0,0,0,0,...,-20.0,-0.457898,-6.0,-20.0,-1.190476,-10.805861,60.0,-140.0,-6.083333,-62.5


<br>
<br>


**Train/Validate/Test Split**

In [None]:
slow_observations_train, slow_observations_test = train_test_split(
    slow_observations, test_size=0.1, random_state=42
)

fast_observations_train, fast_observations_test = train_test_split(
    fast_observations, test_size=0.1, random_state=42
)

In [None]:
# initializing the hyperparameters
n_components = np.arange(1, 11)
covariance_type = ["full", "tied", "diag", "spherical"]
tol = np.arange(0.001, 0.011, 0.001)
init_params = ["kmeans", " k-means++", "random", "random_from_data"]
random_state = np.arange(0, 11)

hyperparametes = {
    "n_components": n_components,
    "covariance_type": covariance_type,
    "tol": tol,
    "init_params": init_params,
    "random_state": random_state,
}

In [None]:
max_iter = 1000

# initialize the model
slow_model = GaussianMixture(max_iter=max_iter)
fast_model = GaussianMixture(max_iter=max_iter)

In [None]:
# initialize the grid search cv
slow_grid = GridSearchCV(slow_model, hyperparametes, cv=5, n_jobs=-1, verbose=1)
fast_grid = GridSearchCV(fast_model, hyperparametes, cv=5, n_jobs=-1, verbose=1)

# fit the model
s = np.vstack(slow_observations_train)
slow_grid.fit(s)
f = np.vstack(fast_observations_train)
fast_grid.fit(f)

In [None]:
slow_hmm = slow_grid.best_estimator_
fast_hmm = fast_grid.best_estimator_

slow_hmm.fit(s)
fast_hmm.fit(f)


In [None]:
# test the HMM
accuracy = 0

for obs in slow_observations_test:
    if slow_hmm.score(obs) > fast_hmm.score(obs):
        accuracy += 1

for obs in fast_observations_test:
    if fast_hmm.score(obs) > slow_hmm.score(obs):
        accuracy += 1

accuracy = accuracy / (len(slow_observations_test) + len(fast_observations_test))
print("Accuracy: ", accuracy)

<br>
<br>
