# Imports and setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from DDDS.drive import Drive
import pandas as pd

In [3]:
drive = Drive()

Connected successfully!


In [4]:
folders = drive.list('folder')
folders

[{'id': '1-AspS9GBqIj-pzA5KM6F03o-9fviuBq2', 'name': 'échantillon'},
 {'id': '1-yIXpLgb_gMzgnaT2Wxl0ckHhRmp47KU', 'name': 'Video validations'},
 {'id': '17UaFCmIylltkftISr0rthJoSS66bJYlt', 'name': 'Validations'},
 {'id': '1i22RM6oJNZr8opAhLK8BrRZTfjh5caOC', 'name': 'Video annotations'},
 {'id': '15nrMZnD_YgMchc43zIFoyWxEMiXYVIP9', 'name': 'data non utilisés'},
 {'id': '15lelEwAZMbPUeUhFrjiwo-hN6d1up8H8',
  'name': 'Videos archives non utilsés'},
 {'id': '1i-BUr1yQTLSXl2dk5US7ZNtrdZHXhQ0n', 'name': 'non utilisé'},
 {'id': '13hPk0MqNH_yNnRGRNTIMUfyT_4HfxJuG', 'name': 'auto evaluation'},
 {'id': '1-IHttqHaNoz0nKEh5TPPGUh7xeKFhBnr', 'name': 'capture ecrans'},
 {'id': '1-2v2QM1S74j789YGMkUInShLyQVu_gjL', 'name': 'annotations'}]

## Look for validated annotation files

In [5]:
# folder ID with validated annotation files
validations_folder = [folder['id'] for folder in folders if folder['name'] == 'Video validations'][0]
validations_folder

'1-yIXpLgb_gMzgnaT2Wxl0ckHhRmp47KU'

In [6]:
# Files in "Video validations" folder
validated_videos = drive.list('csv', add_query=f"'{validations_folder}' in parents")
validated_videos

[{'id': '11D-Lanp1cp03piT0y7FcvdTP3b27LP9O',
  'name': 'Christophe-2021-10-28 09-28-56 844.flv.csv'},
 {'id': '11mnlOUXETTsZO6w0R5jCxKkPPDcosNpL',
  'name': 'validateur-2021-11-22 15-40-50 eb0.flv.csv'},
 {'id': '1HEjJJt7lX77D2dO8c9p6hR5KYYtK3FSC',
  'name': 'validateur-2021-11-25 13-45-12 ed2.flv.csv'},
 {'id': '11_6AdcMIdoajeJ3PqToNWxpYmvZszP5o',
  'name': 'Christophe-2021-11-17 10-58-38 e99.flv.csv'},
 {'id': '11YSruFD5BIvon23xF0wu1GYX0mzSGNx9',
  'name': 'Christophe-2021-10-25 10-31-58 c27.flv.csv'},
 {'id': '1FQIYDGcHM4o7Olhgg0DQr5aU2UGxtYGr',
  'name': 'validateur-2021-11-04 13-51-55 ed2.flv.csv'},
 {'id': '18o3ZjAVuQoqkLlYfTd_Skvv7u34GuqSV',
  'name': 'Zakaria-2021-12-01 13-20-00 e99.flv.csv'},
 {'id': '18kM5KV4h3riTCRUxG1lcSuBuGOr5yTgC',
  'name': 'Zakaria-2021-11-24 15-36-04 e99.flv.csv'},
 {'id': '18i0DQAsu9hAV17Cj3FtawH0grIKpO9Bs',
  'name': 'Zakaria-2021-12-07 14-35-11 ea9.flv.csv'},
 {'id': '11YLe6ccK6uf751T0KwG_n-fJx_2OwEU3',
  'name': 'Christophe-2021-11-30 16-16-03 ece.

## Get "id" of the file to obtain sync file and HRV reading file

In [7]:
def get_date_time_id(file_name):
    """
    Takes validated annotation name as an argument (files in 'Video validations' folder)
    Returns tuple of year, month, day, hour, minute, driver_id
    """
    space_split = file_name.split(' ')
    date_split = space_split[0].split('-')
    time_split = space_split[1].split('-')
    driver_id = space_split[-1].split('.')
    # year, month, day, hour, minute, driver_id
    return (date_split[1], date_split[2], date_split[3], time_split[0], time_split[1], driver_id[0])

In [8]:
date_time_ids = []
for video in validated_videos:
    date_time_ids.append(get_date_time_id(video['name']))

In [9]:
def get_hrv_format_date_id(date_time_id):
    """
    Takes result of get_date_time_id as argument
    Returns tuple of date in HRV format and driver id
    """
    return f"{date_time_id[2]}_{date_time_id[1]}_{date_time_id[0]}", date_time_id[5]

In [10]:
dates_drivers = []
for date_time_id in date_time_ids:
    dates_drivers.append(get_hrv_format_date_id(date_time_id))

## Get sync files

In [11]:
def get_sync_file(date, driver, connection=drive):
    """
    Searches for sync file with timestamps for experiment stary
    """
    return connection.list('csv', add_query=f"(name contains 'annotation_{date}' and name contains '{driver}')")

In [12]:
sync_files = []
for date_driver in dates_drivers:
    sync_files += get_sync_file(*date_driver)

In [13]:
sync_files

[{'id': '11KmJHEHeZC1WSx_zRbcW27AaAPxrajtK',
  'name': 'annotation_28_10_2021_09_22 844.csv'},
 {'id': '141ck31WFIXrxcNOQLUbXx6YP6YRoupMv',
  'name': 'annotation_22_11_2021_15_38 eb0.csv'},
 {'id': '15PQ-lCo-CIXOnO9qNRw_1zOmkwzTXGOn',
  'name': 'annotation_25_11_2021_13_42 ed2.csv'},
 {'id': '133iDeUWQYFTQ9YxGqCU2M327_tPv04Nh',
  'name': 'annotation_17_11_2021_10_57 e99.csv'},
 {'id': '10G5_JbxA6LaG9C1reqtw-e1PI0umXWBH',
  'name': 'annotation_25_10_2021_10_26 c27.csv'},
 {'id': '12BYPDLSQXiyYWb-5a-lMluVarAoHkS6z',
  'name': 'annotation_04_11_2021_13_43 ed2.csv'},
 {'id': '16A9SKfL1OiGbriakm3n8CP9RG_5CTtcE',
  'name': 'annotation_01_12_2021_13_17 e99.csv'},
 {'id': '14aLzPjOHicUeoV4hwsj0Dns16l0r-Gs8',
  'name': 'annotation_24_11_2021_15_34 e99.csv'},
 {'id': '16SEKAkMs9SyzeX9CrQfcmnJeWSxZtZA6',
  'name': 'annotation_07_12_2021_14_35 ea9.csv'},
 {'id': '166GqPGOCosTC1PTLsM_ZqpYbo3iqYQCL',
  'name': 'annotation_30_11_2021_16_12 ece.csv'},
 {'id': '15vNSh2CcaV75FzGO2s-dhEBC_2UdIT1s',
  'na

# Download HRV file (using DDDS.hrv)

In [14]:
from DDDS.hrv import HRV
drive = Drive()

Connected successfully!


In [15]:
hrv = HRV()

Connected successfully!


In [16]:
dataframes = hrv.get_dataframes()

Progress: |██████████████████████████████████████████████████| 100.0% Complete


In [19]:
def get_hrv_id(date, driver):
    """
    Returns key in hrv.dataframes dictionary corresponding to date and driver
    """
    for key in dataframes.keys():
        if date in key and driver in key:
            return key

In [20]:
dates_drivers

[('28_10_2021', '844'),
 ('22_11_2021', 'eb0'),
 ('25_11_2021', 'ed2'),
 ('17_11_2021', 'e99'),
 ('25_10_2021', 'c27'),
 ('04_11_2021', 'ed2'),
 ('01_12_2021', 'e99'),
 ('24_11_2021', 'e99'),
 ('07_12_2021', 'ea9'),
 ('30_11_2021', 'ece'),
 ('30_11_2021', 'eb0'),
 ('05_11_2021', 'ed2'),
 ('22_11_2021', 'ea9'),
 ('24_11_2021', '038'),
 ('02_11_2021', '1c3'),
 ('29_11_2021', 'eca'),
 ('05_11_2021', 'ea9'),
 ('28_10_2021', '1c3'),
 ('26_10_2021', 'c04'),
 ('02_11_2021', 'eb0'),
 ('26_10_2021', 'c27'),
 ('19_11_2021', 'ece'),
 ('28_10_2021', '982'),
 ('24_11_2021', 'eca'),
 ('26_11_2021', '1c3'),
 ('25_11_2021', '038'),
 ('26_11_2021', 'ea9'),
 ('26_11_2021', 'ed2'),
 ('27_10_2021', '844'),
 ('04_11_2021', 'eca'),
 ('18_11_2021', 'e99'),
 ('25_10_2021', 'c04'),
 ('29_10_2021', '87e'),
 ('27_10_2021', '87e'),
 ('29_10_2021', '982')]

In [21]:
key = get_hrv_id(*dates_drivers[0])
key

'28_10_2021_09_22 844'

In [22]:
dataframes[key]
# round timestamps by second
# group by second
# average heart rate values
# add in kss values by second group
# impute with first values of group / avg?

Unnamed: 0,Timestamp_Google,Device_id,Heart_Rate,RR_rate,Garmin
0,2021-10-28 07:25:39.949,E3:C6:1E:14:37:C8,90,[],1
1,2021-10-28 07:25:39.981,E3:C6:1E:14:37:C8,90,[],1
2,2021-10-28 07:25:40.231,CB:42:7F:0A:7E:0C,84,[1276],1
3,2021-10-28 07:25:40.262,E8:AC:14:80:83:29,92,[688],1
4,2021-10-28 07:25:40.434,E3:C6:1E:14:37:C8,90,[],1
...,...,...,...,...,...
26402,2021-10-28 08:38:46.142,E3:C6:1E:14:37:C8,88,[573],1
26403,2021-10-28 08:38:46.174,E3:C6:1E:14:37:C8,88,[],1
26404,2021-10-28 08:38:46.502,CB:42:7F:0A:7E:0C,98,"[841, 665]",1
26405,2021-10-28 08:38:46.689,E3:C6:1E:14:37:C8,0,[],1


# Download sync files

In [23]:
sync_files_content = drive.download([file['id'] for file in sync_files])

Progress: |██████████████████████████████████████████████████| 100.0% Complete


In [24]:
exp_start_timestamps = []
for file in sync_files_content:
    df = pd.read_csv(file, index_col=[0])
    # Get 'exp_start' first column
    # usually it's timestamp_goole but sometimes timetamp
    exp_start_timestamps.append(df.loc['exp_start'][df.columns[0]])

In [25]:
exp_start_timestamps

[1635406301149,
 1637592073636.0,
 1637844329071.0,
 1637143133345,
 1635150767939,
 1636030325621,
 1638361218488,
 1637764600414,
 1638884143409,
 1638285387835,
 1638272172006.0,
 1636105938693.0,
 1637576812727,
 1637758026197,
 1635863051616,
 1638190109130,
 1636117193360,
 1635426282196,
 1635238766304,
 1635238766304,
 1635858260425,
 1635248018146,
 1637325536836,
 1635411216417,
 1637748099738,
 1637926486597.0,
 1637833449252,
 1637936234646,
 1637920268951.0,
 1635340092992,
 1636020074260.0,
 1637238112621.0,
 1635166948028,
 1635500213838,
 1635335425399,
 1635507210158]

# Download annotation files

In [26]:
annotation_content = drive.download([video['id'] for video in validated_videos])

Progress: |██████████████████████████████████████████████████| 100.0% Complete


In [27]:
annotation_csvs = []
for file in annotation_content:
    # Read CSV and drop useless columns
    df = pd.read_csv(file).drop(columns=['timestamp_lena', 'Unnamed: 0'])
    # Select only confirmed events
    df = df[df['validation'] == 1]
    df['aligned_instant'] = df['Instant'] - df.iloc[0]['Instant']
    annotation_csvs.append(df)

In [28]:
annotation_csvs[2]

Unnamed: 0,Instant,duree,evenement,validation,aligned_instant
0,16388,1.000000,Experience start,1,0
2,374301,0.341190,Debut conduite,1,357913
4,535656,0.934461,Grattez\frottez,1,519268
5,535934,1.496705,Grattez\frottez,1,519546
6,568722,2.303948,Grattez\frottez,1,552334
...,...,...,...,...,...
313,3768965,4.822157,C.Position,1,3752577
314,3769101,2.321809,C.Position,1,3752713
315,3888199,7.406835,Baillements,1,3871811
316,3892496,2.618438,Baillements,1,3876108


## Not finished, completed inside module file (.py)

In [None]:
annotations_df = pd.read_csv(drive.download('1HEjJJt7lX77D2dO8c9p6hR5KYYtK3FSC'), index_col=0).drop(columns='timestamp_lena')

In [None]:
annotations_df['aligned_instant'] = annotations_df['Instant'] - annotations_df.iloc[0]['Instant']
annotations_df

In [None]:
annotations_df['Timestamp_Google'] = pd.to_timedelta(annotations_df['aligned_instant'], unit='ms') + exp_start['timestamp_google']

In [None]:
annotations_df

# Test

In [None]:
pip install opencv-python

In [None]:
from DDDS.annotations import Annotations

In [None]:
annots = Annotations()

In [None]:
annots.annotations[0]

# Aligning annotations to HRV per file

In [29]:
from DDDS.drive import Drive

drive = Drive()

Connected successfully!


In [30]:
from DDDS.hrv import HRV
hrv = HRV()

Connected successfully!


In [31]:
dataframes = hrv.get_dataframes()

Progress: |██████████████████████████████████████████████████| 100.0% Complete


ValueError: could not convert string to Timestamp

In [32]:
keys = []
for i in range(len(dates_drivers)):
    keys.append(get_hrv_id(*dates_drivers[i]))
        
hrv_dataframes = [dataframes[key] for key in keys]

annotation_dataframes = []
for i in range(len(annots.annotations)):
    annotation_dataframes.append(annots.annotations[i])

combined_dfs = []

for hrv, annot in zip(hrv_dataframes, annotation_dataframes):
    df = pd.concat([hrv, annot], ignore_index=True)
    df = df.sort_values('Timestamp_Google')
    df = df.reset_index()
    df = df.drop(columns=['index', 'Timestamp_Device'], errors='ignore')
    combined_dfs.append(df)
    

NameError: name 'dataframes' is not defined

In [None]:
combined_dfs[0]

In [None]:
# see the indicies of timestamps where an event from annotations occurs
import numpy as np
np.where(combined_dfs[1]['evenement'].notnull())[0]

In [None]:
# Example to see how annotations are passed into each combined df 
combined_dfs[1].iloc[436:]