Python Libraries


In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

Custom Functions


In [2]:
from useful_functions.dd_dictionary import create_dd_dictionary
from useful_functions.pd_dictionary import create_pd_dictionary
from useful_functions.takeover_dataframe import create_takeover_timestamps
from useful_functions.check_for_missing_data import check_for_missing_data

# Importing Data

---


### Defining the raw data folders

In [3]:
driving_data_folder = "../AdVitam/Exp2/Raw/Driving"
physio_data_folder = "../AdVitam/Exp2/Raw/Physio/Txt"

### Participants to Exclude

| Participant | Reason |
| --- | --- |
| NST77 | Driving file contains obstacles = "TriggeredObs2TriggeredObs3" and "TriggeredObs3TriggeredObs4" |
| NST91, ST84, ST60 | Does not contain a physio file |

In [4]:
check_for_missing_data(driving_data_folder, physio_data_folder)

['NST91.txt', 'ST84.txt', 'ST60.txt']

In [5]:
participants_to_exclude = ["NST77", 'NST91', 'ST84', 'ST60']

### Driving Data

**Metadata:**
| Feature | Description | Notes |
| --- | --- | --- |
| Time | Time elapsed since the software was launched (in seconds) | NA |
| EngineSpeed | Engine speed (in rpm) | Removed |
| GearPosActual | Current gear | Removed |
| GearPosTarget | Next planned gear | Removed |
| AcceleratorPedalPos | Position of gas pedal. | Recording problem, Removed |
| DeceleratorPedalPos | Position of brake pedal. | Recording problem, Removed |
| SteeringWheelAngle | Steering wheel angle (in degrees) | NA |
| VehicleSpeed | Vehicle speed (in mph) | NA |
| Position X | Vehicle position along the x-axis in the simulated driving environment | NA |
| Position Y | Vehicle position along the y-axis in the simulated driving environment | NA |
| Position Z | Vehicle position along the z-axis in the simulated driving environment | NA |
| Autonomous Mode (T/F) | Autonomous pilot status. | True = autonomous pilot activated, False = autonomous pilot deactivated (driver in control of the car) |
| Obstacles | Events that occurred during the driving simulation. | See Below |

- Obstacles: Events that occurred during the driving simulation.
  - TriggeredObsX = Time at which each takeover request was triggered by the experimenter..
  - Obs1 = deer, Obs2 = traffic cone, Obs3 = frog, Obs4 = traffic cone, Obs5 = false alarm (x2).
  - Detected = Time at which the driver pressed the steering wheel button to notify he/she understood the situation. The driver is in control of the car when the value of the column "Autonomous Mode (T/F)" is False.


In [6]:
driving_data_dictionary = create_dd_dictionary(
    driving_data_folder, participants_to_exclude
)
len(driving_data_dictionary)

87

**Processing the driving data**

Steps Taken
1. Fit a label encoder to the `Obstacles` column
2. Transform the `Obstacles` column for all driver data
3. Resample driver data to 10ms

In [7]:
# Fitting a Label Encoder to the Obstacles
driver_data =driving_data_dictionary['NST01']

# label encoding
enc = LabelEncoder()
enc.fit(driver_data["Obstacles"])

for driver in driving_data_dictionary.keys():
    driver_data = driving_data_dictionary[driver]
    # label encoding
    driver_data["Obstacles"] = enc.transform(driver_data["Obstacles"])

    # resampling
    driver_data["Time"] = pd.to_timedelta(driver_data["Time"], unit="s")
    driver_data = driver_data.drop_duplicates(subset="Time")
    driver_data = driver_data.set_index("Time")
    driver_data = driver_data.resample("10ms").ffill()
    driver_data = driver_data.reset_index()

    # replacing the dictionary value
    driving_data_dictionary[driver] = driver_data

### Physiological Signals

**Metadata:**
| Feature | Description | Notes |
| --- | --- | --- |
| min | Time Elapsed | **Note sure when recording started |
| ECG | Electrocardiogram | 1000Hz |
| EDA | Electrodermal Activity | 1000Hz |
| RESP | Resperatory | 1000Hz |

### Physiological Signal Markers
Contains the timestamps for each period of the experiment.

- Training1 = Baseline phase
- Training2 = Practice phase in the driving simulator
- Driving = Main driving session in conditionally automated driving.

Be careful, the timestamps are here in seconds while they are in minutes in the raw data.


In [8]:
phsyiological_data_dictionary = create_pd_dictionary(
    physio_data_folder, participants_to_exclude
)
len(phsyiological_data_dictionary) / 2

87.0

### Driver Demographic Data


In [9]:
driver_demographic_data = pd.read_csv(
    "../AdVitam/Exp2/Preprocessed/Questionnaires/Exp2_Database.csv"
)

**Processing driver demographic data**

In [10]:
driver_demographic_data.head()

Unnamed: 0,code,date,time,condition,sex,age,mothertongue,education,driving_license,km_year,...,sart_6_fa,sart_7_fa,sart_8_fa,sart_10_fa,demand_fa,supply_fa,understanding_fa,sart_global_fa,nb_times_remind_counting,notes
0,NST1,51218,90503,1,1,19,1,1,2017,200,...,6,7,5,7,8,27,16,35,,I accidentally triggered the F9 obstacle twice...
1,ST2,51218,100823,2,1,19,1,1,2017,5000,...,1,2,1,7,3,6,12,15,1.0,
2,NST3,51218,161024,1,1,19,1,1,2017,1000,...,3,6,1,2,3,17,7,21,,"Timestamp driving start : Driving End, the 1st..."
3,ST4,61218,144813,2,1,21,3,2,2016,1500,...,2,4,2,2,6,12,8,14,1.0,"In the testing phase 1, we have to remove the ..."
4,NST5,61218,162147,1,1,22,1,1,2017,1500,...,4,2,4,1,3,18,9,24,,


In [11]:
# Remove the participants that are not in the driving data
driver_demographic_data = driver_demographic_data[
    ~driver_demographic_data["code"].isin(participants_to_exclude)
]

# Unneeded columns
driver_demographic_data = driver_demographic_data.drop(
    columns=[
    "date",
    "time",
    "nasa_tlx_1",
    "nasa_tlx_1_corrected",
    "nasa_tlx_2",
    "nasa_tlx_2_corrected",
    "nasa_tlx_3",
    "nasa_tlx_3_corrected",
    "nasa_tlx_4",
    "nasa_tlx_4_corrected_inter",
    "nasa_tlx_4_corrected",
    "nasa_tlx_5",
    "nasa_tlx_5_corrected",
    "nasa_tlx_6",
    "nasa_tlx_6_corrected",
    "nasa_tlx_global",
    "danger_deer",
    "realism_deer",
    "sart_1_deer",
    "sart_2_deer",
    "sart_3_deer",
    "sart_4_deer",
    "sart_5_deer",
    "sart_6_deer",
    "sart_7_deer",
    "sart_8_deer",
    "sart_10_deer",
    "demand_deer",
    "supply_deer",
    "understanding_deer",
    "sart_global_deer",
    "danger_cone",
    "realism_cone",
    "sart_1_cone",
    "sart_2_cone",
    "sart_3_cone",
    "sart_4_cone",
    "sart_5_cone",
    "sart_6_cone",
    "sart_7_cone",
    "sart_8_cone",
    "sart_10_cone",
    "demand_cone",
    "supply_cone",
    "understanding_cone",
    "sart_global_cone",
    "danger_frog",
    "realisme_frog",
    "sart_1_frog",
    "sart_2_frog",
    "sart_3_frog",
    "sart_4_frog",
    "sart_5_frog",
    "sart_6_frog",
    "sart_7_frog",
    "sart_8_frog",
    "sart_10_frog",
    "demand_frog",
    "supply_frog",
    "understanding_frog",
    "sart_global_frog",
    "danger_can",
    "realism_can",
    "sart_1_can",
    "sart_2_can",
    "sart_3_can",
    "sart_4_can",
    "sart_5_can",
    "sart_6_can",
    "sart_7_can",
    "sart_8_can",
    "sart_10_can",
    "demand_can",
    "supply_can",
    "understanding_can",
    "sart_global_can",
    "sart_1_fa",
    "sart_2_fa",
    "sart_3_fa",
    "sart_4_fa",
    "sart_5_fa",
    "sart_6_fa",
    "sart_7_fa",
    "sart_8_fa",
    "sart_10_fa",
    "demand_fa",
    "supply_fa",
    "understanding_fa",
    "sart_global_fa",
    ]
)


driver_demographic_data.head()

Unnamed: 0,code,condition,sex,age,mothertongue,education,driving_license,km_year,accidents,nb_times_remind_counting,notes
0,NST1,1,1,19,1,1,2017,200,1,,I accidentally triggered the F9 obstacle twice...
1,ST2,2,1,19,1,1,2017,5000,0,1.0,
2,NST3,1,1,19,1,1,2017,1000,0,,"Timestamp driving start : Driving End, the 1st..."
3,ST4,2,1,21,3,2,2016,1500,0,1.0,"In the testing phase 1, we have to remove the ..."
4,NST5,1,1,22,1,1,2017,1500,0,,


---


# Defining Takeover Quality Quantitatively

- Takeover Time (TOT)
- Sudden Vehicle Deviation
- Response Budget


### Takeover Time


In [12]:
takeover_timestamps = create_takeover_timestamps(driving_data_dictionary, enc)
takeover_timestamps.head()

Unnamed: 0,index,TriggeredObs1,TakeoverObs1,ReleaseObs1,TOTObs1,TriggeredObs2,TakeoverObs2,ReleaseObs2,TOTObs2,TriggeredObs3,...,ReleaseObs3,TOTObs3,TriggeredObs4,TakeoverObs4,ReleaseObs4,TOTObs4,TriggeredObs5,TakeoverObs5,ReleaseObs5,TOTObs5
0,NST01,0 days 00:05:11.974200,0 days 00:05:18.804200,0 days 00:05:28.764200,0 days 00:00:06.830000,0 days 00:09:11.494200,0 days 00:09:13.964200,0 days 00:09:23.654200,0 days 00:00:02.470000,0 days 00:10:50.094200,...,0 days 00:10:54.554200,0 days 00:00:04.080000,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
1,ST02,0 days 00:08:03.979300,0 days 00:08:08.999300,0 days 00:08:17.339300,0 days 00:00:05.020000,0 days 00:06:03.149300,0 days 00:06:06.569300,0 days 00:06:09.769300,0 days 00:00:03.420000,0 days 00:14:38.599300,...,0 days 00:14:44.779300,0 days 00:00:04.560000,0 days 00:17:24.939300,0 days 00:17:29.289300,0 days 00:17:33.199300,0 days 00:00:04.350000,NaT,NaT,NaT,NaT
2,NST03,0 days 00:16:04.013200,0 days 00:16:08.633200,0 days 00:16:41.013200,0 days 00:00:04.620000,0 days 00:12:48.623200,0 days 00:12:51.843200,0 days 00:13:24.443200,0 days 00:00:03.220000,NaT,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
3,ST04,0 days 00:19:23.934300,0 days 00:19:36.624300,0 days 00:19:54.174300,0 days 00:00:12.690000,0 days 00:13:29.504300,0 days 00:13:32.174300,0 days 00:13:39.614300,0 days 00:00:02.670000,NaT,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
4,NST05,0 days 00:10:02.164780,0 days 00:10:04.474780,0 days 00:10:06.294780,0 days 00:00:02.310000,0 days 00:15:16.364780,0 days 00:15:31.474780,0 days 00:15:36.064780,0 days 00:00:15.110000,0 days 00:17:52.614780,...,0 days 00:18:00.284780,0 days 00:00:03.340000,0 days 00:07:21.274780,0 days 00:07:24.604780,0 days 00:07:27.654780,0 days 00:00:03.330000,NaT,NaT,NaT,NaT


# !! Here!!

Why are the obstacle trigger times different in the processed data?

Could that have affected my results?

Example:

timestamps_obstacles.csv: Time elapsed (in seconds) between the start of the main driving session and the appearance of the obstacles (TrigObsX), the time when the driver pressed the button to report having understood the situation (DetObsX), and the time when the driver actually took over control (RepObsX). X corresponds to one of obstacle or the false alarm.

In [13]:
obstacle_timestamps = pd.read_csv(
    "../AdVitam/Exp2/Preprocessed/Physio and Driving/timestamps_obstacles.csv"
)
obstacle_timestamps.head()

Unnamed: 0,subject_id,label_st,TrigObsDeer,DetObsDeer,RepObsDeer,TrigObsCone,DetObsCone,RepObsCone,TrigObsFrog,DetObsFrog,RepObsFrog,TrigObsCan,DetObsCan,RepObsCan,TrigObsFA1,DetObsFA1,RepObsFA1,TrigObsFA2,DetObsFA2,RepObsFA2
0,NST1,0,176.7051,179.0932,183.5238,416.214,418.5109,418.6902,514.8157,518.8456,,786.6408,792.8591,,983.624,,,1082.245,1086.363,
1,ST2,1,230.7565,234.5881,235.778,109.9334,112.2556,113.3516,625.3827,628.3633,629.9416,791.7203,793.7173,796.0673,357.7144,360.2164,,468.5158,470.4113,
2,NST3,0,815.204,818.4693,819.8244,619.8088,621.8167,623.0322,259.4712,263.789,,1027.4009,1030.7859,,378.3409,,,1115.1749,1122.8189,
3,ST4,1,1040.3619,1042.3819,1053.0479,685.9281,,,287.3724,289.5112,,119.9266,120.56,,410.9882,462.7615,,886.9369,889.8139,
4,NST5,0,428.9613,430.7424,431.2726,743.1664,744.4617,758.2736,899.4186,900.7246,902.7536,268.0716,269.9875,271.4027,143.6314,144.7424,,629.1736,630.4212,


---

### Physiological Signals

**Metadata:**
| Feature | Description | Notes |
| --- | --- | --- |
| min | Time Elapsed | **Note sure when recording started |
| ECG | Electrocardiogram | 1000Hz |
| EDA | Electrodermal Activity | 1000Hz |
| RESP | Resperatory | 1000Hz |

### Physiological Signal Markers
Contains the timestamps for each period of the experiment.

- Training1 = Baseline phase
- Training2 = Practice phase in the driving simulator
- Driving = Main driving session in conditionally automated driving.

Be careful, the timestamps are here in seconds while they are in minutes in the raw data.

In [14]:
# NST01 Raw Physiological Data
nst01_physio_data = pd.read_csv(
    "../AdVitam/Exp2/Raw/Physio/Txt/NST01.txt",
    sep="\t",
    header=9,
    skiprows=[10],
    usecols=[0, 1, 2, 3],
)
nst01_physio_data.head()

Unnamed: 0,min,CH1,CH2,CH3
0,0.0,15.7639,7.09503,-0.310669
1,1.7e-05,15.7639,7.08344,-0.310974
2,3.3e-05,15.7639,7.07001,-0.311584
3,5e-05,15.7623,7.05414,-0.31189
4,6.7e-05,15.7593,7.03644,-0.3125


**Processing the Physiological data**

In [15]:
# converting the time to timedelta
nst01_physio_data["min"] = pd.to_timedelta(nst01_physio_data["min"], unit="min")

# resampling
nst01_physio_data = nst01_physio_data.drop_duplicates(subset="min")
nst01_physio_data = nst01_physio_data.set_index("min")
nst01_physio_data = nst01_physio_data.resample("10ms").ffill()
nst01_physio_data = nst01_physio_data.reset_index()

nst01_physio_data

Unnamed: 0,min,CH1,CH2,CH3
0,0 days 00:00:00,15.763900,7.09503,-0.310669
1,0 days 00:00:00.010000,15.748600,6.93176,-0.313721
2,0 days 00:00:00.020000,15.762300,6.65833,-0.314636
3,0 days 00:00:00.030000,15.748600,6.44409,-0.315552
4,0 days 00:00:00.040000,15.762300,6.30493,-0.316467
...,...,...,...,...
221417,0 days 00:36:54.170000,-0.299072,-4.97040,2.055360
221418,0 days 00:36:54.180000,-0.299072,-4.72748,2.050480
221419,0 days 00:36:54.190000,-0.300598,-4.57916,2.048340
221420,0 days 00:36:54.200000,-0.299072,-4.60876,2.041930


### Physiological Markers
**Metadata:**

| Feature | Description | Notes |
| --- | --- | --- |
| Marker index: | Lists the event number | Removed |
| Time(sec.): | Timestamps corresponding to experiment phases | NA |
| Label | Label of the phase | See Bellow | 

Label:
- Training 1 = Baseline,
- Training 2 = Practice with Driving Simulator,
- Driving = Main Driving Session

In [16]:
nst01_physio_markers = pd.read_csv(
    "../AdVitam/Exp2/Raw/Physio/Txt/NST01-markers.txt", header=2, sep="\t"
)
nst01_physio_markers

Unnamed: 0,Marker Index:,Time(sec.):,Label:
0,Event 1:,4.095,"Training 1 Start, 08:13:59"
1,Event 2:,305.32,"Training 1 End, 08:19:00"
2,Event 3:,534.26,"Training 2 Start, 08:22:49"
3,Event 4:,794.405,"Training 2 End, 08:27:09"
4,Event 5:,973.685,"Driving Start, 08:30:08"
5,Event 6:,2174.65,"Driving End, 08:50:09"


### Physiological Timestamps

Time elapsed (in seconds) between the start of the main driving session and the appearance of the obstacles.
- TrigObsX: the time when the driver pressed the button to report having understood the situation 
- DetObsX: and the time when the driver actually took over control 
- RepObsX: X corresponds to one of obstacle or the false alarm.

In [17]:
physio_timestamps = pd.read_csv(
    "../AdVitam/Exp2/Preprocessed/Physio and Driving/timestamps_obstacles.csv"
)
nst01_physio_timestamps = physio_timestamps.iloc[0]
nst01_physio_timestamps

subject_id         NST1
label_st              0
TrigObsDeer    176.7051
DetObsDeer     179.0932
RepObsDeer     183.5238
TrigObsCone     416.214
DetObsCone     418.5109
RepObsCone     418.6902
TrigObsFrog    514.8157
DetObsFrog     518.8456
RepObsFrog          NaN
TrigObsCan     786.6408
DetObsCan      792.8591
RepObsCan           NaN
TrigObsFA1      983.624
DetObsFA1           NaN
RepObsFA1           NaN
TrigObsFA2     1082.245
DetObsFA2      1086.363
RepObsFA2           NaN
Name: 0, dtype: object

**Processing the Physiological data**

Steps:
1. Trim the data down to each experimental phase

In [88]:
# Baseline
baseline_start = pd.to_timedelta(nst01_physio_markers["Time(sec.):"][0], unit="s")
baseline_end = pd.to_timedelta(nst01_physio_markers["Time(sec.):"][1], unit="s")

nst01_physio_baseline_data = nst01_physio_data[
    (nst01_physio_data["min"] >= baseline_start)
    & (nst01_physio_data["min"] <= baseline_end)
].copy()

# Training
training_start = pd.to_timedelta(nst01_physio_markers["Time(sec.):"][2], unit="s")
training_end = pd.to_timedelta(nst01_physio_markers["Time(sec.):"][3], unit="s")

nst01_physio_training_data = nst01_physio_data[
    (nst01_physio_data["min"] >= training_start)
    & (nst01_physio_data["min"] <= training_end)
].copy()

# Driving
driving_start = pd.to_timedelta(nst01_physio_markers["Time(sec.):"][4], unit="s")
driving_end = pd.to_timedelta(nst01_physio_markers["Time(sec.):"][5], unit="s")

nst01_physio_driving_data = nst01_physio_data[
    (nst01_physio_data["min"] >= driving_start)
    & (nst01_physio_data["min"] <= driving_end)
].copy()


In [107]:
# Adding an 'Obstacles' column
nst01_physio_driving_data["Obstacles"] = "Nothing"

# Match the timestamps with the obstacles
obstacles = nst01_physio_timestamps.index.values
obstacles = obstacles[2:]
obstacles = obstacles[:-1]

for obstacle in obstacles:
    # Time when the obstacle appears
    obstacle_appears = driving_start + pd.to_timedelta(
        nst01_physio_timestamps[obstacle], unit="s"
    )

    # Add this marker to the Obstacles column
    if not pd.isna(obstacle_appears):
        mask = nst01_physio_driving_data["min"] >= obstacle_appears
        first_index = mask.idxmax()
        nst01_physio_driving_data.at[first_index, "Obstacles"] = obstacle

nst01_physio_driving_data

Unnamed: 0,min,CH1,CH2,CH3,Obstacles
97369,0 days 00:16:13.690000,25.3754,-0.100708,-0.489807,Nothing
97370,0 days 00:16:13.700000,25.4150,-0.161743,-0.427856,Nothing
97371,0 days 00:16:13.710000,25.4517,-0.092468,-0.368347,Nothing
97372,0 days 00:16:13.720000,25.4730,-0.051880,-0.341187,Nothing
97373,0 days 00:16:13.730000,25.5081,0.003052,-0.295410,Nothing
...,...,...,...,...,...
217461,0 days 00:36:14.610000,23.8007,0.025635,-1.226810,Nothing
217462,0 days 00:36:14.620000,23.8022,0.053711,-1.225280,Nothing
217463,0 days 00:36:14.630000,23.7976,0.018616,-1.223140,Nothing
217464,0 days 00:36:14.640000,23.7976,-0.035706,-1.219790,Nothing


In [108]:
nst01_physio_driving_data['Obstacles'].unique()

array(['Nothing', 'TrigObsDeer', 'DetObsDeer', 'RepObsDeer',
       'TrigObsCone', 'DetObsCone', 'RepObsCone', 'TrigObsFrog',
       'DetObsFrog', 'TrigObsCan', 'DetObsCan', 'TrigObsFA1',
       'TrigObsFA2', 'DetObsFA2'], dtype=object)