In [2]:
import pandas as pd
import numpy as np
import os
from tqdm.notebook import tqdm

In [3]:
key_distance = pd.read_excel('C:/Research Activities/Dataset/Keyboard_distance.xlsx', header=0)

In [4]:
annotations_dir = 'C:/Research Activities/Dataset/BB-MAS_Dataset/'

desktop_free = pd.read_csv(annotations_dir+'Desktop_Freetext.csv', header=0)
phone_free = pd.read_csv(annotations_dir+'Phone_Freetext.csv', header=0)
tablet_free = pd.read_csv(annotations_dir+'Tablet_Freetext.csv', header=0)

desktop_free['timestamp'] = pd.to_datetime(desktop_free['timestamp'])
phone_free['FreeText_Start'] = pd.to_datetime(phone_free['FreeText_Start'])
tablet_free['FreeText_Start'] = pd.to_datetime(tablet_free['FreeText_Start'])

desktop_annotations = desktop_free.values
phone_annotations = phone_free.values
tablet_annotations = tablet_free.values

desktop_calibrated_annotations = {}
for annot in desktop_annotations:
    desktop_calibrated_annotations[int(annot[0])] = annot[1]

phone_calibrated_annotations = {}
for annot in phone_annotations:
    phone_calibrated_annotations[int(annot[0])] = annot[1]

tablet_calibrated_annotations = {}
for annot in tablet_annotations:
    tablet_calibrated_annotations[int(annot[0])] = annot[1] 

In [5]:
def return_target_csv(data, target_time):
        target_csv = []
        for i, data_item in enumerate(data):
              if(data_item[3]<target_time):
                target_csv.append([data_item[0], data_item[1], data_item[2], data_item[3]])
        return np.asarray(target_csv)

In [6]:
data_dir = 'C:/Research Activities/Dataset/BB-MAS_Dataset/Desktop/'
target_dir = 'C:/Research Activities/Dataset/BB-MAS_Dataset/Desktop_fixed_text/'

user_files = os.listdir(data_dir)
for i in tqdm(range(len(user_files))):
        user_file = user_files[i]
        data_frame = pd.read_csv(data_dir+user_file, header=0)
        data_frame['time'] =  pd.to_datetime(data_frame['time'])
        user_data = data_frame.values
        curr_user_ind = int(user_file[:user_file.find('_')])
        try:
            target_time_stamp = desktop_calibrated_annotations[curr_user_ind]
            csv = return_target_csv(user_data, target_time_stamp)
        except KeyError as e:
            csv = user_data

        f = open(target_dir+'User_'+str(curr_user_ind)+'.csv', 'w')
        f.write('"EID","key","direction","time"\n')
        for line in csv:
            f.write('"'+str(line[0])+'","'+str(line[1])+'","'+str(line[2])+'","'+str(line[3])+'"\n')
        f.close()

  0%|          | 0/109 [00:00<?, ?it/s]

In [152]:
def get_timings_KIT(keys_in_pipeline, search_key, search_key_timing):
    mask = np.ones(len(keys_in_pipeline))
    keys_in_pipeline = np.asarray(keys_in_pipeline)
    for i, (key, timing) in enumerate(keys_in_pipeline):
          if(search_key==key):
            mask[i] = 0
            non_zero_indices = np.nonzero(mask) 

            if(len(non_zero_indices)>0):
                keys_in_pipeline = keys_in_pipeline[non_zero_indices]
            else:
                  keys_in_pipeline = []

            return keys_in_pipeline, timing, search_key_timing
    return keys_in_pipeline, None, None

# function to get KIT data frame with key, press_time, release_time for a given user
def get_dataframe_KIT(data):
    """ Input: data  Output: Dataframe with (key, press_time, release_time)""" 
    
    keys_in_pipeline = []
    result_key = []
    press = []
    release = []
    for row_idx in range(len(data)):
        keys_in_pipeline = list(keys_in_pipeline)
        curr_key = data[row_idx][1]
        curr_direction = data[row_idx][2]
        curr_timing = data[row_idx][3]

        if(curr_direction==0):
            keys_in_pipeline.append([curr_key, curr_timing])

        if(curr_direction==1):
            keys_in_pipeline, curr_start, curr_end = get_timings_KIT(keys_in_pipeline, curr_key, curr_timing)
            if(curr_start is None):
                continue
            else:
                result_key.append(curr_key)
                press.append(curr_start)
                release.append(curr_end)

    resultant_data_frame = pd.DataFrame(list(zip(result_key, press, release)),
               columns =['Key', 'Press_Time', 'Release_Time']) 
    return resultant_data_frame  

In [153]:
def get_DIG_features(data):
  
    result = [{'Keys': str(data[row_idx][0])+','+str(data[row_idx + 1][0]), 
               'Holdtime1': (((data[row_idx][2]-data[row_idx][1])).microseconds)/1000,
            'Holdtime2': (((data[row_idx + 1][2]-data[row_idx + 1][1])).microseconds)/1000,
               'F1': (((data[row_idx+1][1]-data[row_idx][2])).microseconds)/1000,
              'F2': (((data[row_idx+1][1]-data[row_idx][1])).microseconds)/1000,
              'F3': (((data[row_idx+1][2]-data[row_idx][2])).microseconds)/1000,
              'F4': (((data[row_idx+1][2]-data[row_idx][1])).microseconds)/1000} 
              for row_idx in range(0, len(data)) 
              if (row_idx + 1 < len(data))]
        
    df = pd.DataFrame(result)
    
    return df

In [1]:
def apply_distance(data1, data2):
    data_combine =  data1.join(data2.set_index('Keys'), on='Keys', how='left').sort_index(axis=0, ascending=True)
    
    return np.asarray(data_combine)

In [197]:
data_dir = 'C:/Research Activities/Dataset/BB-MAS_Dataset/Desktop_fixed_text/'
target_dir = 'C:/Research Activities/Dataset/BB-MAS_Dataset/Desktop_features_fixed/'

user_files = os.listdir(data_dir)
for i in tqdm(range(len(user_files))):
        user_file = user_files[i]
        data_frame = pd.read_csv(data_dir+user_file, header=0)
        data_frame.iloc[:,3] =  pd.to_datetime(data_frame.iloc[:,3])
        user_data = data_frame.values
        curr_user_ind = int(user_file[user_file.find('_')+1:user_file.find('.')])
        user_timing = get_dataframe_KIT(user_data).values
        get_features = get_DIG_features(user_timing)
        csv = apply_distance(get_features, key_distance)

        f = open(target_dir+'User_'+str(curr_user_ind)+'.csv', 'w')
        f.write('"Keys","Holdtime1","Holdtime2","F1","F2","F3","F4","Distance","Hands"\n')
        for line in csv:
            f.write('"'+str(line[0])+'","'+str(line[1])+'","'+str(line[2])+'","'+str(line[3])+'","'+str(line[4])+'","'+str(line[5])+'","'+str(line[6])+'","'+str(line[7])+'","'+str(line[8])+'"\n')
        f.close()

  0%|          | 0/115 [00:00<?, ?it/s]