# Analysis of the hazard data

## Import and helper functions

In [1]:
import os
import codecs
import csv
from datetime import datetime
import numpy as np
import seaborn as sns
import matplotlib.cm as cm
import matplotlib.mlab as ml
from scipy.stats import kendalltau
import pandas as pd
from pylab import savefig


# Read UTF-16 encoded unicode '.txt'. Usefull for cross-platform encoding.
def load_csv(location):
    d = []
    f=codecs.open(location,"rb")
    csvread=csv.reader(f, delimiter=';')
    csvread.next()
    for row in csvread:
        d.append(row)
    
    # Filtering the data where Eye Tracking is not working
    return [line for line in d]  

# Read UTF-16 encoded unicode '.txt'. Usefull for cross-platform encoding.
def load_tsv(location):
    data = []
    f=codecs.open(location,"rb","utf-16")
    csvread=csv.reader(f,delimiter='\t')
    csvread.next()
    for row in csvread:
        data.append(row)
    
    # Filtering the data where Eye Tracking is not working
    return data 

def convert_time(time):
    return datetime.strptime(time, '%M:%S,%f')

# Small script to load all the data from the folder
data = []
directory = os.path.join("c:\\","/media/sf_EyeTracking/results/hazards")
for root,dirs,files in os.walk(directory):
    print(str(len(files)) + " files found.") 
    for file in files:
        if file.endswith(".csv"):
            data.extend(load_csv("/media/sf_EyeTracking/results/hazards/"+file))
data = [line for line in data if line[0] != '']
print(str(len(data)) + " lines of file.")

def per_hazard(tmp, value):
    per_hazard = [line for line in tmp if line[4] == value]
    return per_hazard

def per_participant(tmp, participant):
    per_participant = [line for line in tmp if line[0] == participant]
    return per_participant

63 files found.
1992 lines of file.


## Applying corrections and validating to the collected CSV data.

In [2]:
# Correcting an error made in the file template. 
def correct_line(line):
    # For tryout1, 2nd hazard is 3CH and not 2CH.
    new_line = line
    if line[2] == '1' and line[3] == '2' and line[4] == '2CH':
        new_line[4] = '3CH'
    if line[16] == '7+':
        new_line[16] = '8'
    return new_line

print("Correction done.")

data = [correct_line(line) for line in data]

Correction done.


In [3]:
# Validate the data by checking occurence order, or consistency between colums.
def validate_line(line):
    # Verify that cross event is after first fixation, if one.
    seconds = convert_time(line[14]) - convert_time(line[6])
    if seconds.total_seconds() < 0:
        return False

    # Verify that there is no typo in cross event.
    seconds = convert_time(line[14]) - convert_time("5:00,0")
    if seconds.total_seconds() > 0:
        return False
    
    # Verify that there is no typo in first fixation.
    seconds = convert_time(line[6]) - convert_time("05:00,0")
    if seconds.total_seconds() > 0:
        return False
    
    # Verify that there fixation number is higher than detection number.
    if int(line[16]) < int(line[15]):
        return False
    
    # Verify that the number of fixations is the good one.
    return validate_fixations(line)


def validate_fixations(line):
    nb = int(line[16]) + 7
    for i in range(7, nb):
        if line[i] == '0':
            return False
    return True

participant_values = list(set([line[0] for line in data if line[0] != '']))
hazard_values = list(set([line[4] for line in data if line[4] != '']))

def validate_data(tmp):
    for line in tmp:
        if validate_line(line) is False:
            print(line)
            
    # Verifying the number of distinct hazards.
    if len(hazard_values) != 24:
        return False
    
    # Verifying the number of distinct participants.
    if len(participant_values) != 63:
        return False
    
    # Success case
    return "Data has been validated."
            
validate_data(data)

'Data has been validated.'

## Pre-analysis of the hazards

In [4]:
# Computing the fixation time percentage and time difference between first fixation and hazard.

results = []

for line in data:
    new_line = line
    time_advance = 0
    total_fixation = 0
    
    
    if (line[6] != '' and line[14] != ''):
        time_advance = convert_time(line[14]) - convert_time(line[6])
        for i in range(7,14):
            if isinstance(line[i], basestring):
                line[i] = line[i].replace(',','.')
        total_fixation = float(line[7]) + float(line[8]) + float(line[9]) + float(line[10]) + float(line[11])+ float(line[12]) + float(line[13])
        time_advance = time_advance.total_seconds()
    
    percentage = 0
    if time_advance != 0:
        percentage = round(total_fixation / time_advance * 100,3)
    # Sometimes percentage can be over 100 due to the roundings, correct it.
    if percentage > 100:
        percentage = 100
    
    new_line.append(time_advance)
    new_line.append(percentage)
    results.append(new_line)
    new_line = []

In [5]:
# Verifying if some strange results inside the new computations.
for l in results:
    if l[18] < 0 or l[19] > 100 or l[19] < 0:
        print(l)

## Results Generation

In [6]:
def average(_list, index):
    average = 0
    for l in _list:
        average += float(l[index])
    return round(average / len(_list), 3)  

def std(_list, index):
    new_list = [l[index] for l in _list]
    arr = np.array(new_list).astype(np.float)
    return round(np.std(arr), 3)

def stops(_list):
    return len([l for l in _list if l[15] < l[16]]) * 100 / len(_list)

def analyse(tmp):
    yes = [line for line in tmp if line[5] == 'YES']
    no = [line for line in tmp if line[5] == 'NO']
    accuracy = len(yes) * 100 / (len(yes) + len(no))
    """
    print("Analysis made on " + str(len(data)) + " lines")
    print("Number of hazards seen by the participants: " + str(len(yes)))
    print("Mean values:")
    print("1) Accuracy: " + str(accuracy) + " %")
    print("2) Average first fixation duration: " + str(average(yes, 7))) 
    print("3) Average time of first fixation before hazard: " + str(average(yes, 18)))
    print("4) Average fixation percentage during hazard: " + str(average(yes, 19)) +" %")
    print("5) Average fixation number on hazard: " + str(average(yes, 16)))
    print("6) Average fixation number on hazard as detection: " + str(average(yes, 15)))
    print("7) Average percentage of stops on hazards while driving: " + str(stops(yes)) + " %")
    print("Deviation values:")
    print("2) Standard deviation of first fixation duration: " + str(std(yes, 7)))
    print("3) Standard deviation of time of first fixation: " + str(std(yes, 18)))
    print("4) Standard deviation of fixation percentage during hazard: " + str(std(yes, 19)) +" %")
    print("5) Standard deviation for fixation number on hazard: " + str(std(yes, 16)))
    print("6) Standard deviation for fixation number on hazard as detection: " + str(std(yes, 15)))
    print("----------------------------------------")
    """
    result = [accuracy, average(yes, 7), std(yes, 7), average(yes, 18), std(yes, 18), average(yes, 19), std(yes, 19)]
    result.extend([average(yes, 16), std(yes, 16), average(yes, 15), std(yes, 15), stops(yes)])
    return result

def analyse_participant(ID):
    tmp = [line for line in results if line[0] == ID]
    print("Participant " + str(ID) + " - " + str(len(tmp)) + " lines")
    analyse(tmp)    
    
for participant in participant_values:
    analyse_participant(participant)


Participant 56 - 32 lines
Participant 54 - 32 lines
Participant 42 - 32 lines
Participant 43 - 24 lines
Participant 60 - 32 lines
Participant 61 - 32 lines
Participant 62 - 32 lines
Participant 63 - 32 lines
Participant 64 - 32 lines
Participant 49 - 32 lines
Participant 66 - 32 lines
Participant 67 - 32 lines
Participant 68 - 32 lines
Participant 69 - 32 lines
Participant 52 - 32 lines
Participant 24 - 32 lines
Participant 25 - 32 lines
Participant 27 - 32 lines
Participant 20 - 32 lines
Participant 21 - 32 lines
Participant 22 - 32 lines
Participant 23 - 32 lines
Participant 46 - 24 lines
Participant 47 - 24 lines
Participant 44 - 32 lines
Participant 45 - 32 lines
Participant 28 - 32 lines
Participant 29 - 32 lines
Participant 40 - 32 lines
Participant 41 - 32 lines
Participant 2 - 32 lines
Participant 5 - 32 lines
Participant 4 - 32 lines
Participant 7 - 32 lines
Participant 6 - 32 lines
Participant 9 - 32 lines
Participant 8 - 32 lines
Participant 39 - 32 lines
Participant 65 - 32

In [7]:
analyse(results)
analyse_participant('30')

Participant 30 - 32 lines


In [8]:
# To generate the corrected raw tables.
def generate_data_per_participant(save=False):
    
    a = [["Participant", "Record_Name", "Track", "Hazard_number", "Hazard_type", "Seen", "First fixation time", 
          "First fixation", "Second fixation", "Third fixation", "Fouth fixation", "Fifth fixation", 
          "Sixth fixation", "Seventh fixation", "Cross event", "Detections", "Fixations", "Remarks",
          "Detection laps", "Percentage of fixation during hazard"]]
    
    for value in participant_values:
        if save == True:
            a = [["Participant", "Record_Name", "Track", "Hazard_number", "Hazard_type", "Seen", "First fixation time", 
              "First fixation", "Second fixation", "Third fixation", "Fouth fixation", "Fifth fixation", 
              "Sixth fixation", "Seventh fixation", "Cross event", "Detections", "Fixations", "Remarks",
              "Detection laps", "Percentage of fixation during hazard"]]
        
        a.extend(per_participant(data, value))
        
        if save == True:
            with open("data/data_per_participant/participant_data_"+value+".csv", "wb") as f:
                writer = csv.writer(f, delimiter = ';')
                writer.writerows(a)
    return a

generate_data_per_participant(save = True)

[['Participant',
  'Record_Name',
  'Track',
  'Hazard_number',
  'Hazard_type',
  'Seen',
  'First fixation time',
  'First fixation',
  'Second fixation',
  'Third fixation',
  'Fouth fixation',
  'Fifth fixation',
  'Sixth fixation',
  'Seventh fixation',
  'Cross event',
  'Detections',
  'Fixations',
  'Remarks',
  'Detection laps',
  'Percentage of fixation during hazard'],
 ['50',
  'Recording206',
  '1',
  '1',
  '1AH',
  'YES',
  '00:12,2',
  '0.68',
  '2.36',
  '0',
  '0',
  '0',
  '0',
  '0',
  '00:16,8',
  '2',
  '2',
  'offset',
  4.6,
  66.087],
 ['50',
  'Recording206',
  '1',
  '2',
  '3CH',
  'YES',
  '00:34,2',
  '0.4',
  '0.24',
  '0.2',
  '0.68',
  '0',
  '0',
  '0',
  '00:40,6',
  '4',
  '4',
  '',
  6.4,
  23.75],
 ['50',
  'Recording206',
  '1',
  '3',
  '2AV',
  'YES',
  '00:55,7',
  '2.14',
  '0.24',
  '1.7',
  '0',
  '0',
  '0',
  '0',
  '01:01,2',
  '3',
  '3',
  '',
  5.5,
  74.182],
 ['50',
  'Recording206',
  '1',
  '4',
  '5CV',
  'YES',
  '01:20,0',
  '0

In [9]:
# To generate tables by hazard
def generate_data_per_hazard(save=False):
    a = [["Participant", "Record_Name", "Track", "Hazard_number", "Hazard_type", "Seen", "First fixation time", 
          "First fixation", "Second fixation", "Third fixation", "Fouth fixation", "Fifth fixation", 
          "Sixth fixation", "Seventh fixation", "Cross event", "Detections", "Fixations", "Remarks",
          "Detection laps", "Percentage of fixation during hazard"]]
    for value in hazard_values:
        if save ==True:
            a = [["Participant", "Record_Name", "Track", "Hazard_number", "Hazard_type", "Seen", "First fixation time", 
              "First fixation", "Second fixation", "Third fixation", "Fouth fixation", "Fifth fixation", 
              "Sixth fixation", "Seventh fixation", "Cross event", "Detections", "Fixations", "Remarks",
              "Detection laps", "Percentage of fixation during hazard"]]
        
        a.extend(per_hazard(results, value))
        
        if save == True:
            with open("data/data_per_hazard/hazard_"+value+".csv", "wb") as f:
                writer = csv.writer(f, delimiter = ';')
                writer.writerows(a)
    return a

generate_data_per_hazard(save=True)

[['Participant',
  'Record_Name',
  'Track',
  'Hazard_number',
  'Hazard_type',
  'Seen',
  'First fixation time',
  'First fixation',
  'Second fixation',
  'Third fixation',
  'Fouth fixation',
  'Fifth fixation',
  'Sixth fixation',
  'Seventh fixation',
  'Cross event',
  'Detections',
  'Fixations',
  'Remarks',
  'Detection laps',
  'Percentage of fixation during hazard'],
 ['2',
  'Recording008',
  '1',
  '1',
  '1AH',
  'YES',
  '00:35,2',
  '0.46',
  '2.48',
  '0',
  '0',
  '0',
  '0',
  '0',
  '00:40,6',
  '2',
  '2',
  'left offset',
  5.4,
  54.444],
 ['4',
  'Recording012',
  '1',
  '1',
  '1AH',
  'YES',
  '00:23,6',
  '0.6',
  '2.04',
  '0.66',
  '0.68',
  '0.46',
  '0',
  '0',
  '00:32,5',
  '4',
  '5',
  '',
  8.9,
  49.888],
 ['5',
  'Recording017',
  '1',
  '1',
  '1AH',
  'YES',
  '00:22,5',
  '0.54',
  '2.62',
  '0.38',
  '0',
  '0',
  '0',
  '0',
  '00:28,3',
  '3',
  '3',
  '',
  5.8,
  61.034],
 ['6',
  'Recording032',
  '1',
  '1',
  '1AH',
  'YES',
  '00:26,7

In [10]:
# To generate tables by hazard
def generate_results_participant(save=False):
    a = [["Participant", "Accuracy", "Average first fixation time", "Std first dixation time",
             "Average laps time between first fixation and hazard", "Std time between first fixation and hazard",
             "Percentage of fixation during hazard", "Std of fixation during hazard",
             "Average number of fixations", "Std of number of fixations",
             "Average number of detections", "Std of number of detections",
             "Percentage of stops"]]
    
    # Adding the average results first
    new_line = ["0"]
    new_line.extend(analyse(results))
    a.append(new_line)
    
    # Adding results participant per participant
    for value in participant_values:
        new_line = [value]
        new_line.extend(analyse(per_participant(results, value)))
        a.append(new_line)
    
    # Writting the results as CSV
    if save == True:
        with open("all_participants_results.csv", "wb") as f:
            writer = csv.writer(f, delimiter = ';')
            writer.writerows(a)
        
    return a

# generate_results_participant()       

In [11]:
# To generate tables by hazard
def generate_results_hazard(save = False):
    a = [["Participant", "Accuracy", "Average first fixation time", "Std first dixation time",
             "Average laps time between first fixation and hazard", "Std time between first fixation and hazard",
             "Percentage of fixation during hazard", "Std of fixation during hazard",
             "Average number of fixations", "Std of number of fixations",
             "Average number of detections", "Std of number of detections",
             "Percentage of stops"]]
    
    # Adding the average results first
    new_line = ["0"]
    new_line.extend(analyse(results))
    a.append(new_line)
    
    # Adding results participant per participant
    for value in hazard_values:
        new_line = [value]
        new_line.extend(analyse(per_hazard(results, value)))
        a.append(new_line)
    
    # Writting the results as CSV
    if save == True:
        with open("all_hazards_results.csv", "wb") as f:
            writer = csv.writer(f, delimiter = ';')
            writer.writerows(a)
    return a

# generate_results_hazard() 

## Getting a table showing recording and type

In [12]:
# This table has also been used in another script when working with events.

def getKey0(item):
    return item[0]
    
a = [x[1:3] for x in results]
b_set = set(tuple(x) for x in a)
b = [ list(x) for x in b_set ]
b = sorted(b, key=getKey0)

# Generating heatmaps

In [13]:
# Reading the eye tracking data from 3 different files
if True:
    tmp = load_tsv("/media/sf_EyeTracking/data/short_1_30_unicode.txt")
    tmp.extend(load_tsv("/media/sf_EyeTracking/data/short_31_60_unicode.txt"))
    tmp.extend(load_tsv("/media/sf_EyeTracking/data/short_61_71_unicode.txt"))

print "Data contains " + str(len(tmp)) + " rows."

# Also contains the data when eyetracking fails because live events can be added at the same moment.

Data contains 2163736 rows.


In [225]:
def ms2mins(millis):
    millis = int(millis)
    seconds=(millis/1000)%60
    seconds = int(seconds)
    minutes=(millis/(1000*60))%60
    minutes = int(minutes)
    return ("%dm%ds" % (minutes, seconds))

def mins2ms(minutes, seconds, millis=0):
    return minutes * 60 * 1000 + seconds * 1000 + millis

In [318]:
def heatmap(usr, rec, start=-1, end=-1, title_ext="", save = False, hazard=""):
    
    participant = 0             # current participant ID
    recording_name = 1          # name of the recording
    recording_duration = 2      # recording duration 
    time_column = 3             # time indication
    gaze_x_column = 4           # x-position of the gaze point 
    gaze_y_column = 5           # y-position of the gaze point 
    pupil_diam_left = 6         # diameter of left pupil over time
    pupil_diam_right = 7        # diameter of right pupil over time       
    mt_column = 8               # movement type column
    md_column = 9               # movement duration column
    mi_column = 10              # movement index column
    event_column = 11           # Event type
   
    def get_time(usr, rec):
        coord = [int(line[time_column]) for line in tmp 
                 if line[participant] == usr and line[recording_name] == rec]
        if len(coord) == 0:
            return -1, -1
        return min(coord), max(coord) 
        
    if start == -1 and end == -1:
        start, end = get_time(usr, rec)
    
    if start == -1 and end == -1:
        return 
    
    # Selecting the values for one user and one recording.
    coord = [[line[gaze_x_column], line[gaze_y_column]] for line in tmp 
             if line[participant] == usr and line[recording_name] == rec
             and int(line[time_column]) < end 
             and int(line[time_column]) > start]
    
    
    # Converting to integers 
    def is_number(str):
        try:
            float(str)
            return True
        except:
            return False
    
    def toInt(l):
        return [int(i) for i in l if is_number(i)]
       
    coord = [toInt(l) for l in coord if len(toInt(l)) == 2]
 
    # Extracting the data
    x = [l[0] for l in coord]
    y = [l[1] for l in coord]
    x = np.asarray(x).astype(np.int)
    y = np.asarray(y).astype(np.int)
    
    # Printing the plot 
    title = "Participant " + usr + " (" + rec + ") from " + ms2mins(start) + " to " + ms2mins(end) + title_ext  
    print(title)
    
    if len(x) == 0 or len(y) == 0:
        print("No data to analyse")
        print(" ")
        return
    
    sns.plt.clf()
    sns.set(style="ticks")
    sns.jointplot(x, 1080-y, stat_func=kendalltau, kind="kde",
                     xlim=(0,1920), ylim=(0,1080))
    
    if save ==  True:
        output_dir = "/home/quentin/heatmap/" + hazard # + usr
        savefig( output_dir + '/' + title + '.png', bbox_inches='tight')
    
    # sns.plt.show()


In [319]:
heatmap(usr='42', rec='Recording172')

Participant 42 (Recording172) from 0m0s to 2m32s


In [320]:
def get_time_event(recording, hazard):
    info = [x for x in table if x[0] == hazard]
    lines = [x for x in tmp if x[1] == recording and x[11] == 'Logged live Event']
    
    if len(lines) not in [16, 17, 18]:
        print("Should have been a crash during recording.")
        return [-1, -1]
    
    if len(info) == 0:
        return [-1, -1]
    
    start = info[0][2] - 1
    end = info[0][3] - 1
    
    try:
        return [int(lines[start][3]), int(lines[end][3])]
    except:
        return [-1, -1]

    
def pick_times(from_seen, from_hazard):
    
    if from_seen == [0, 0]:
        return from_hazard
    
    if from_hazard == [-1,-1]:
        return from_seen
    
    result = []
    if from_seen[0] < from_hazard[0]:
        result.append(from_seen[0])
    else:
        result.append(from_hazard[0])
    if from_seen[1] > from_hazard[1]:
        result.append(from_seen[1])
    else:
        result.append(from_hazard[1])    
    return [int(r) for r in result]
    

In [322]:
from datetime import timedelta

participant_data = generate_data_per_participant()
hazard_data = generate_data_per_hazard()

def create_heatmap(data, usr, rec, hazard, seen, save = False):
    # Select correct information
    heatmap_data = [l for l in data if l[0] == usr and l[1] == rec and l[4] == hazard]
    
    def convert(time):
        r = (convert_time(time) - datetime(1900, 1, 1))
        triple = [(r.seconds//60)%60, (r.seconds)%60, r.microseconds//1000]
        return mins2ms(triple[0], triple[1], triple[2])
    
    if hazard in ['4AH', '4CH', '4AV', '4CV']:
        start, end = int(convert(heatmap_data[0][6])), int(convert(heatmap_data[0][14]))
    else:
        start, end = pick_times([int(convert(heatmap_data[0][6])), int(convert(heatmap_data[0][14]))], 
                             get_time_event(rec, hazard))
    
    # Goal heatmap creator function
    heatmap(usr, rec, start=start, end=end, title_ext= " for the hazard " + hazard + seen, hazard=hazard, save=save)

def heatmap_participant(p_data, usr, save= False):
    hazards_records = [[l[1], l[4], l[5]]  for l in p_data if l[0] == usr]
    for r in hazards_records:
        if r[2] == 'YES':
            create_heatmap(p_data, usr, r[0], r[1], ' seen', save=save)
        if r[2] == 'NO':
            create_heatmap(p_data, usr, r[0], r[1], ' not seen', save=save)
    print("User " + str(usr) + " done.")

def heatmap_hazard(h_data, hazard, save = False):
    hazards_records = [[l[0], l[1], l[5]]  for l in h_data if l[4] == hazard]

    for r in hazards_records:
        if r[2] == 'YES':
            create_heatmap(h_data, r[0], r[1], hazard, ' seen.', save=save)
        if r[2] == 'NO':
            create_heatmap(h_data, r[0], r[1], hazard, ' not seen.', save=save)
    print("Hazard " + str(hazard) + " done.")    


In [323]:
# Equivalence between hazard and position of the logged live event
table = [['1AH', 1, 1, 2], ['3CH', 1, 3, 5], ['2AV', 1, 6, 8],
         ['5CV', 1, 9, 10], ['1CV', 1, 11, 12], ['6AV', 1, 13, 14],
         ['4AH', 1, 15, 15], ['6CH', 1, 16, 17], ['1CH', 2, 1, 2], 
         ['2AH', 2, 3, 5], ['4CV', 2, 6, 6], ['5AV', 2, 7, 8], 
         ['3AV', 2, 9, 11], ['3AH', 2, 12, 14], ['2CV', 2, 15, 17], 
         ['6AH', 2 ,18, 19], ['6CV', 3, 1, 2], ['4CH', 3, 3, 3], 
         ['5AH', 3, 4, 5], ['4AV', 3, 6, 6], ['5CH', 3, 7, 8], 
         ['2CH', 3, 9,11], ['3CV', 3, 12, 14], ['1AV', 3, 15, 16]]

In [324]:
# get_time_event("Recording007", '2CH') # -1, -1
start_ex, end_ex = get_time_event("Recording005", '1AH') 
heatmap('2', 'Recording005', start=start_ex, end=end_ex, title_ext="")

Participant 2 (Recording005) from 0m26s to 0m37s


In [326]:
# heatmap_participant(participant_data, '38', True)
# heatmap_hazard(hazard_data, '1CH', save=True)
# create_heatmap(participant_data, '30', 'Recording118', '2AV', ' seen')

In [329]:
hazard_values.sort()
for hazard in hazard_values:
    heatmap_hazard(hazard_data, hazard, save=True)

Participant 2 (Recording008) from 0m34s to 0m40s for the hazard 1AH seen.
Participant 4 (Recording012) from 0m22s to 0m32s for the hazard 1AH seen.
Participant 5 (Recording017) from 0m22s to 0m28s for the hazard 1AH seen.
Participant 6 (Recording032) from 0m26s to 0m38s for the hazard 1AH seen.
Participant 7 (Recording037) from 0m29s to 0m35s for the hazard 1AH seen.
Participant 7 (Recording034) from 0m32s to 0m38s for the hazard 1AH seen.
Participant 8 (Recording041) from 0m21s to 0m27s for the hazard 1AH seen.
Participant 8 (Recording038) from 0m19s to 0m27s for the hazard 1AH seen.
Participant 9 (Recording043) from 0m27s to 0m32s for the hazard 1AH seen.
Participant 11 (Recording049) from 0m30s to 0m41s for the hazard 1AH seen.
Participant 12 (Recording053) from 0m21s to 0m28s for the hazard 1AH seen.
Participant 13 (Recording058) from 0m20s to 0m30s for the hazard 1AH seen.
Participant 13 (Recording055) from 0m20s to 0m26s for the hazard 1AH seen.
Participant 14 (Recording063) from

In [287]:
participant_values.sort()

# done except 38, 56, 57
for participant in participant_values:
    heatmap_participant(participant_data, participant, save=True)

Participant 38 (Recording154) from 0m25s to 0m33s for the hazard 6CV seen
Participant 38 (Recording154) from 0m34s to 0m42s for the hazard 4CH seen
Participant 38 (Recording154) from 0m49s to 1m0s for the hazard 5AH not seen
Participant 38 (Recording154) from 1m16s to 1m23s for the hazard 4AV seen
Participant 38 (Recording154) from 1m24s to 1m34s for the hazard 5CH seen
Participant 38 (Recording154) from 1m49s to 1m58s for the hazard 2CH seen
Participant 38 (Recording154) from 2m13s to 2m23s for the hazard 3CV seen
Participant 38 (Recording154) from 2m45s to 2m54s for the hazard 1AV seen
Should have been a crash during recording.
Participant 38 (Recordnig153) from 0m31s to 0m37s for the hazard 1AH seen
No data to analyse
 
Should have been a crash during recording.
Participant 38 (Recordnig153) from 0m57s to 1m7s for the hazard 2CH seen
No data to analyse
 
Should have been a crash during recording.
Participant 38 (Recordnig153) from 1m26s to 1m34s for the hazard 2AV seen
No data to an

ValueError: min() arg is an empty sequence

## Clustering

In [25]:
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler

In [26]:
X = [[x[0], x[1], x[3], x[5], x[7]] for x in generate_results_participant()[1:]]

In [27]:
def getKey(item):
    try: 
        return int(item[0])
    except:
        return 0
    
X = sorted(X, key=getKey)
X

[['0', 85, 1.083, 5.994, 21.998],
 ['2', 81, 1.183, 3.259, 21.641],
 ['4', 78, 1.033, 2.595, 22.162],
 ['5', 93, 1.522, 2.937, 25.363],
 ['6', 96, 1.416, 3.267, 24.992],
 ['7', 84, 0.649, 2.817, 18.709],
 ['8', 84, 0.638, 2.573, 19.164],
 ['9', 78, 0.79, 1.738, 19.745],
 ['11', 79, 1.448, 2.809, 17.612],
 ['12', 90, 0.516, 11.599, 21.239],
 ['13', 90, 0.804, 22.378, 17.96],
 ['14', 78, 0.749, 1.752, 17.353],
 ['15', 100, 1.41, 2.126, 20.646],
 ['16', 87, 1.31, 3.022, 16.795],
 ['17', 81, 0.947, 1.39, 25.407],
 ['18', 81, 0.596, 5.888, 21.419],
 ['20', 65, 0.742, 25.576, 19.992],
 ['21', 80, 0.657, 2.2, 18.819],
 ['22', 92, 1.237, 2.404, 23.946],
 ['23', 68, 0.906, 1.59, 19.077],
 ['24', 81, 0.801, 2.179, 21.176],
 ['25', 71, 0.487, 24.553, 18.604],
 ['27', 84, 0.602, 2.428, 21.959],
 ['28', 81, 1.079, 2.14, 20.66],
 ['29', 78, 0.975, 2.27, 19.217],
 ['30', 93, 1.021, 3.569, 21.004],
 ['31', 84, 1.763, 2.721, 22.343],
 ['33', 87, 1.062, 2.252, 23.494],
 ['34', 90, 0.723, 3.399, 17.434],

In [86]:
from sklearn.cluster import KMeans
X_bis = [x[1:] for x in X]
X_bis = np.array(X_bis)
kmeans = KMeans(n_clusters=2).fit(X_bis)
clusters = kmeans.predict(X_bis)

cluster_1 = []
cluster_2 = []
for i in range(0, len(clusters)):
    if clusters[i] == 1: 
        cluster_1.append(X[i])
    if clusters[i] == 0:
        cluster_2.append(X[i])

header = ['ID', 'Accuracy', 'First fixation', 'Time laps', 'Percentage fixation']
cluster_1 = pd.DataFrame(cluster_1, columns = header)
cluster_2 = pd.DataFrame(cluster_2, columns = header)

In [115]:
print cluster_1['ID'].values.tolist()
print cluster_1['Accuracy'].mean()
print cluster_1['First fixation'].mean()
print cluster_1['Time laps'].mean()
print cluster_1['Percentage fixation'].mean()

['0', '2', '4', '7', '8', '9', '11', '14', '17', '18', '20', '21', '23', '24', '25', '27', '28', '29', '31', '38', '41', '43', '45', '48', '50', '52', '53', '55', '60', '61', '69', '70']
79.4375
0.949
3.93728125
20.586125


In [116]:
print cluster_2['ID'].values.tolist()
print cluster_2['Accuracy'].mean()
print cluster_2['First fixation'].mean()
print cluster_2['Time laps'].mean()
print cluster_2['Percentage fixation'].mean()

['5', '6', '12', '13', '15', '16', '22', '30', '33', '34', '36', '39', '40', '42', '44', '46', '47', '49', '51', '54', '56', '57', '58', '59', '62', '63', '64', '65', '66', '67', '68', '71']
91.875
1.0136875
3.56815625
21.21
