In [1]:
#!/usr/bin/python3
'''*.csv Data Format: ['Case', 'RunTime', 'Object', 'EnterTime', 'Velocity', 'Acceleration']'''
import csv, os, numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt

In [2]:
def get_data(fullpath) -> pd.DataFrame:
  df = pd.read_csv(fullpath)
  return df

In [3]:
def find_local_maximums(df: pd.DataFrame, var_name1:'Case', var_name2: 'Velocity') -> pd.DataFrame:
  df_grp = df.groupby(var_name1)
  idx = []
  for key, key_df in df_grp:
    pre_pre_idx = None
    pre_pre_var = None
    pre_idx = None
    pre_var = None
    pre_left = False
    pre_right = False
    for index, row in key_df.iterrows():
      if pre_pre_idx is None:
        pre_pre_idx = index
        pre_pre_var = row[var_name2]
      elif pre_idx is None:
        pre_idx = index
        pre_var = row[var_name2]
      else:
        cur_var = row[var_name2]
        # previous
        pre_left = (pre_pre_var <= pre_var)
        pre_right = (cur_var <= pre_var)
        if pre_left and pre_right:
          idx.append(pre_idx)
        # update
        pre_pre_idx = pre_idx
        pre_pre_var = pre_var
        pre_idx = index
        pre_var = cur_var
  return idx

In [4]:
def find_local_minimums(df: pd.DataFrame, var_name1:'Case', var_name2: 'fastcity') -> pd.DataFrame:
  df_grp = df.groupby(var_name1)
  idx = []
  for key, key_df in df_grp:
    pre_pre_idx = None
    pre_pre_var = None
    pre_idx = None
    pre_var = None
    pre_left = False
    pre_right = False
    for index, row in key_df.iterrows():
      if pre_pre_idx is None:
        pre_pre_idx = index
        pre_pre_var = row[var_name2]
      elif pre_idx is None:
        pre_idx = index
        pre_var = row[var_name2]
      else:
        cur_var = row[var_name2]
        # previous
        pre_left = (pre_pre_var >= pre_var)
        pre_right = (cur_var >= pre_var)
        if pre_left and pre_right:
          idx.append(pre_idx)
        # update
        pre_pre_idx = pre_idx
        pre_pre_var = pre_var
        pre_idx = index
        pre_var = cur_var
  return idx

In [5]:
def get_quantile_and_IQR(df: pd.DataFrame) -> list:
  # shape (2,1) -> squeeze() -> shape (2,)
  [q1, q3] = df.quantile([.25, .75]).squeeze()
  IQR = q3-q1
  return [q1, q3, IQR]

In [6]:
def remove_outliers(df: pd.DataFrame, col_name, by_case) -> pd.DataFrame:
  '''Input: df, target variable's column name, if the removement is by case
     Output: df without outlier data
     Outliers Calculation: data points < (q1-1.5*IQR) or data points > (q3 + 1.5*IQR)
  '''
  if by_case:
   df_by_case = df.groupby('Case')
   df_out = pd.DataFrame(columns=df.columns)
   for key, key_df in df_by_case:
      [q1, q3, IQR] = get_quantile_and_IQR(key_df[col_name])
      temp = key_df[(key_df[col_name] >= q1-1.5*IQR) & (key_df[col_name] <= q3+1.5*IQR)]
      df_out = pd.concat([df_out, temp])
   return df_out
  else:
   [q1, q3, IQR] = get_quantile_and_IQR(df[col_name])
   return df[(df[col_name] >= q1-1.5*IQR) & (df[col_name] <= q3+1.5*IQR)]

In [7]:
'''
1. Get the data of the inner button's velocity or acceleration
2. Delete the outliers and perhaps the positive acceleration
3. Calculate the mean
'''
def get_no_outlier_for_var2(file_path, object_name, var_name1:'RunTime', var_name2: 'Velocity or Acceleration') -> pd.DataFrame:
  # Read one csv and extract data of a specific object
  df_one = get_data(file_path)
  if (var_name2 == "Acceleration"):
    df_one = df_one[(df_one[var_name2] < 0)]
  df_vars = df_one[[var_name1,var_name2]]
  df_no_outliers = remove_outliers(df_vars)
  return df_no_outliers
  

In [8]:
def get_biggest_EnterTime(file_path, var_name1:'Case', var_name2: 'EnterTime'):
  df_one = get_data(file_path)
  df_vars = df_one[[var_name1,var_name2]]
  idx = df_vars.groupby(var_name1)[var_name2].idxmax()
  return df_vars.loc[idx,[var_name1,var_name2]]

In [79]:
def get_crest_for_var2(file_path, var_name1:'Case', var_name2: 'Velocity or Acceleration', AddRunTime, Remove_outlier = True)->pd.DataFrame:
  df = get_data(file_path)
  df_one = df[[var_name1, var_name2]]
  if var_name2 == "Acceleration":
    df_one = df_one[df_one['Acceleration']<0]
  #print(df_one.shape)
  if Remove_outlier == True:
    print(var_name2 + " Outlier Removed")
    df_new = remove_outliers(df_one, var_name2, True)
    print(df_new[var_name2])
  else:
    print(var_name2 + " Outlier Included")
    df_new = df_one
    print(df_new[var_name2])
  #print(df_one_no_out.shape)
  idx = find_local_minimums(df_new, var_name1, var_name2)
  if AddRunTime:
    df_one = df[[var_name1, 'RunTime', var_name2]]
    df_crest = df_one.loc[idx, [var_name1, 'RunTime', var_name2]]
  else:
    df_one = df[[var_name1, var_name2]]
    df_crest = df_one.loc[idx, [var_name1, var_name2]]
  return df_crest

In [78]:
def get_peak_for_var2(file_path, var_name1:'Case', var_name2: 'Velocity or Acceleration', AddRunTime, Remove_outlier = True)->pd.DataFrame:
  df_one = get_data(file_path)
  if var_name2 == "Acceleration":
    df_one = df_one[df_one['Acceleration']<0]
  if Remove_outlier == True:
    print(var_name2 + " Outlier Removed")
    df_new = remove_outliers(df_one, var_name2, True)
    print(df_new)
  else:
    print(var_name2 + " Outlier Included")
    df_new = df_one
    print(df_new)
  idx = find_local_maximums(df_new, var_name1, var_name2)
  if AddRunTime:
    df_one = df_one[[var_name1, 'RunTime', var_name2]]
    df_crest = df_one.loc[idx, [var_name1, 'RunTime', var_name2]]
  else:
    df_one = df_one[[var_name1, var_name2]]
    df_crest = df_one.loc[idx, [var_name1, var_name2]]
  return df_crest

In [10]:
def lineplot_all_and_crest(x:str, y:str, df_all: pd.DataFrame, df_crest: pd.DataFrame, save_path):
  df_all.insert(df_all.shape[1], 'Class', 'all')
  df_crest.insert(df_crest.shape[1], 'Class', 'crest')
  df_concat = pd.concat([df_all,df_crest])
  fig = sns.lineplot(x=x, y=y, hue='Class', data=df_concat)
  figure = fig.get_figure()
  figure.savefig(save_path, dpi=400)
  

In [11]:
def describe_by_Case(df: pd.DataFrame, col_name) -> pd.DataFrame:
  df_case = pd.DataFrame(columns=['Number','Case','Min','Mean','Max'])
  for key, key_df in df:
    df_case.loc[len(df_case.index)] = [key[0],key[1],key_df[col_name].min(),key_df[col_name].mean(),key_df[col_name].max()]
  return df_case

In [60]:
# File Paths
root_dir = r"C:\Users\angel\Desktop\S1DA\Separated - Backtrace - 230422 - study1\Cancel"
analyze_date = "221212"
out_dir = root_dir + " - out"
execute_dir = []
cancel_dir = []
if not os.path.exists(out_dir):
  os.makedirs(out_dir)
for dir in os.listdir(root_dir):
  print(dir)
  if dir.endswith("Cancel"):
    cancel_dir.append(dir)
    #print(cancel_dir)
  elif dir.endswith("Execute"):
    execute_dir.append(dir)
long_in_csvs = []
fast_in_csvs = []
velo_in_csvs = []
acce_in_csvs = []
for dir in cancel_dir:
  temp = root_dir+"/"+dir
  for csv in os.listdir(temp):
    temp_csv = temp + "/" + csv
    if csv.endswith("LongTapIn.csv"):
      long_in_csvs.append(temp_csv)
    elif csv.endswith("FastTapIn.csv"):
      fast_in_csvs.append(temp_csv)
    elif csv.endswith("VelocityIn.csv"):
      velo_in_csvs.append(temp_csv)
    elif csv.endswith("AccelerationIn.csv"):
      acce_in_csvs.append(temp_csv)
print("LongTapInExecute:",long_in_csvs)
print("FastTapInExecute:",fast_in_csvs)
print("VelocityInExecute:",velo_in_csvs)
print("AcceInExecute:",acce_in_csvs)

230426 - 1 - Cancel
230426 - 10 - Cancel
230426 - 2 - Cancel
230426 - 3 - Cancel
230426 - 4 - Cancel
230426 - 5 - Cancel
230426 - 6 - Cancel
230426 - 7 - Cancel
230426 - 8 - Cancel
230426 - 9 - Cancel
LongTapInExecute: []
FastTapInExecute: []
VelocityInExecute: ['C:\\Users\\angel\\Desktop\\S1DA\\Separated - Backtrace - 230422 - study1\\Cancel/230426 - 1 - Cancel/020221105-06-53-56-VelocityIn.csv', 'C:\\Users\\angel\\Desktop\\S1DA\\Separated - Backtrace - 230422 - study1\\Cancel/230426 - 1 - Cancel/20221105-06-53-56-VelocityIn.csv', 'C:\\Users\\angel\\Desktop\\S1DA\\Separated - Backtrace - 230422 - study1\\Cancel/230426 - 10 - Cancel/020230422-11-28-31-VelocityIn.csv', 'C:\\Users\\angel\\Desktop\\S1DA\\Separated - Backtrace - 230422 - study1\\Cancel/230426 - 10 - Cancel/20230422-11-28-31-VelocityIn.csv', 'C:\\Users\\angel\\Desktop\\S1DA\\Separated - Backtrace - 230422 - study1\\Cancel/230426 - 2 - Cancel/020221105-06-59-41-VelocityIn.csv', 'C:\\Users\\angel\\Desktop\\S1DA\\Separated - B

In [61]:
# LongTapTime
'''Output: Number (Subject Number) | Case (Enter Case Number) | EnterTime (Selected Biggest EnterTime)'''
long_save_name = analyze_date + ' - LongTapTime.csv'
long_df = pd.DataFrame(columns=['Number','Case','EnterTime'])
for csv in long_in_csvs:
  index = csv.split('/')[1].split(' - ')[1]
  df = get_biggest_EnterTime(csv, 'Case', 'EnterTime')
  df.insert(df.shape[1], 'Number', index)
  long_df = pd.concat([long_df,df])
long_df = remove_outliers(long_df, 'EnterTime', False)
long_df.to_csv(out_dir + '/' + long_save_name, index=False)
print("Average Biggest Enter Time for Long Tap:",long_df['EnterTime'].mean())

Average Biggest Enter Time for Long Tap: nan


In [62]:
# FastTapTime
'''Output: Number (Subject Number) | Case (Enter Case Number) | EnterTime (Selected Biggest EnterTime)'''
fast_save_name = analyze_date + ' - FastTapTime.csv'
fast_df = pd.DataFrame(columns=['Number','Case','EnterTime'])
for csv in fast_in_csvs:
  index = csv.split('/')[1].split(' - ')[1]
  df = get_biggest_EnterTime(csv, 'Case', 'EnterTime')
  df.insert(df.shape[1], 'Number', index)
  fast_df = pd.concat([fast_df,df])
fast_df = remove_outliers(fast_df, 'EnterTime', False)
fast_df.to_csv(out_dir + '/' + fast_save_name, index=False)
print("Average Biggest Enter Time for Fast Tap:",fast_df['EnterTime'].mean())

Average Biggest Enter Time for Fast Tap: nan


In [66]:
# Velocity
velo_df = pd.DataFrame(columns=['Number','Case','Velocity'])
for csv in velo_in_csvs:
  index = csv.split('/')[1].split(' - ')[1]
  df = pd.read_csv(csv)
  df = df[['Case','Velocity']]
  df = remove_outliers(df, 'Velocity', True)
  df.insert(df.shape[1], 'Number', index)
  velo_df = pd.concat([velo_df,df])
velo_df_grp = velo_df.groupby(['Number','Case'])
velo_by_case = describe_by_Case(velo_df_grp,'Velocity')
velo_min_mean = velo_by_case['Min'].mean()
velo_mean_mean = velo_by_case['Mean'].mean()
velo_max_mean = velo_by_case['Max'].mean()
velo_df.to_csv(out_dir + '/' + analyze_date + ' - Velo_no_outliers.csv', index=False)
velo_by_case.to_csv(out_dir + '/' + analyze_date + ' - Velo_by_case.csv', index=False)
velo_dist = (velo_mean_mean - velo_min_mean)/3
print("Velocity for Button Number 4's condition A:",velo_mean_mean)
print("Velocity for condition C:", velo_max_mean)
print("Velocity distance:", velo_dist)
print("The Velocity of condition A for the seven models:")
for i in range(-3,4):
  print(velo_mean_mean + velo_dist * i)

Velocity for Button Number 4's condition A: 0.5062329337137572
Velocity for condition C: 0.8062087410859728
Velocity distance: 0.07541025730609403
The Velocity of condition A for the seven models:
0.2800021617954751
0.3554124191015692
0.4308226764076632
0.5062329337137572
0.5816431910198513
0.6570534483259453
0.7324637056320393


In [67]:
# Acceleration
acce_df = pd.DataFrame(columns=['Number','Case','Acceleration'])
for csv in acce_in_csvs:
  #rint(csv)
  index = csv.split('/')[1].split(' - ')[1]
  #rint(index)
  df = pd.read_csv(csv)
  df = df[['Case','Acceleration']]
  df = df[df['Acceleration']<-0.5841011435740535]
  df = remove_outliers(df, 'Acceleration', True)
  df.insert(df.shape[1], 'Number', index)
  acce_df = pd.concat([acce_df,df])
  #print(acce_df)
acce_df_grp = acce_df.groupby(['Number','Case'])
acce_by_case = describe_by_Case(acce_df_grp,'Acceleration')
acce_min_mean = acce_by_case['Min'].mean()
acce_mean_mean = acce_by_case['Mean'].mean()
acce_max_mean = acce_by_case['Max'].mean()
acce_df.to_csv(out_dir + '/' + analyze_date + ' - Acce_no_outliers.csv', index=False)
acce_by_case.to_csv(out_dir + '/' + analyze_date + ' - Acce_by_case.csv', index=False)
acce_dist = (acce_mean_mean - acce_max_mean)/3
print("Acceleration for Button Number 4's condition B:",acce_mean_mean)
print("Acceleration distance:",acce_dist)
print("The Acceleration for the seven models:")
for i in range(-3,4):
  print(acce_mean_mean + acce_dist * i)

Acceleration for Button Number 4's condition B: -21.109126356543015
Acceleration distance: -3.5893254010289315
The Acceleration for the seven models:
-10.341150153456221
-13.930475554485152
-17.519800955514086
-21.109126356543015
-24.698451757571945
-28.28777715860088
-31.87710255962981


In [68]:
# Acceleration
acce_df = pd.DataFrame(columns=['Number','Case','Acceleration'])
print(acce_df)
for csv in acce_in_csvs:
  #print(csv)
  csv_name = csv.split('/')[2].split(".")[0]
  #print(csv_name)
  index = csv.split('/')[1].split(' - ')[1]
  #print(index)
  df = pd.read_csv(csv)
  df = df[['Case','Acceleration']]
  df = df[df['Acceleration']<-0.5841011435740535]
  #df = remove_outliers(df, 'Acceleration', True)
  df.insert(df.shape[1], 'Number', index)
  acce_df = pd.concat([acce_df,df])
  #acce_df.to_csv(r"C:\Users\angel\Desktop\S1DA\Separated - Backtrace - 230422 - study1\raw" + "\\" + csv_name + " - raw.csv" , index = False)
acce_df_grp = acce_df.groupby(['Number','Case'])
acce_by_case = describe_by_Case(acce_df_grp,'Acceleration')
acce_min_mean = acce_by_case['Min'].mean()
acce_mean_mean = acce_by_case['Mean'].mean()
acce_max_mean = acce_by_case['Max'].mean()
acce_df.to_csv(out_dir + '/' + analyze_date + ' - Acce_no_outliers.csv', index=False)
acce_by_case.to_csv(out_dir + '/' + analyze_date + ' - Acce_by_case.csv', index=False)
acce_dist = (acce_mean_mean - acce_max_mean)/3
print("Acceleration for Button Number 4's condition B:",acce_mean_mean)
print("Acceleration distance:",acce_dist)
print("The Acceleration for the seven models:")
for i in range(-3,4):
  print(acce_mean_mean + acce_dist * i)

Empty DataFrame
Columns: [Number, Case, Acceleration]
Index: []
Acceleration for Button Number 4's condition B: -21.602520829986943
Acceleration distance: -3.8217620210555556
The Acceleration for the seven models:
-10.137234766820276
-13.958996787875833
-17.780758808931388
-21.602520829986943
-25.4242828510425
-29.246044872098054
-33.06780689315361


In [21]:
acce_df

Unnamed: 0,Number,Case,Acceleration
0,1,11,-13.988710
2,1,11,-2.201226
4,1,11,-3.037988
8,1,12,-2.623502
10,1,12,-13.774800
...,...,...,...
199,9,29,-36.134250
201,9,29,-42.748990
203,9,30,-19.209770
204,9,30,-1.276043


No outlier

Acceleration for Button Number 4's condition B: -4.749872072703843
Acceleration distance: -1.5004264088416142
The Acceleration for the seven models:
-0.24859284617900013
-1.7490192550206145
-3.249445663862229
-4.749872072703843
-6.250298481545457
-7.750724890387071
-9.251151299228685

Original

Acceleration for Button Number 4's condition B: -3.101236399826085
Acceleration distance: -0.9508811845490283
The Acceleration for the seven models:
-0.24859284617900013
-1.1994740307280283
-2.1503552152770564
-3.101236399826085
-4.052117584375113
-5.002998768924141
-5.9538799534731695

No outlier and under -0.5841011435740535

Acceleration for Button Number 4's condition B: -6.000815429471779
Acceleration distance: -1.614556699490593
The Acceleration for the seven models:
-1.1571453309999997
-2.771702030490593
-4.386258729981186
-6.000815429471779
-7.615372128962372
-9.229928828452966
-10.844485527943558

In [59]:
-0.5841011435740535

-0.5841011435740535

Four - removed Outlier

Acceleration for Button Number 4's condition B: -2.7191131912123083
Acceleration distance: -0.8772874907632695
The Acceleration for the seven models:
-0.0872507189224998
-0.9645382096857693
-1.8418257004490388
-2.7191131912123083
-3.596400681975578
-4.473688172738847
-5.350975663502117

Four - With Outlier

Acceleration for Button Number 4's condition B: -5.417435556383105
Acceleration distance: -1.776728279153535
The Acceleration for the seven models:
-0.08725071892250025
-1.863978998076035
-3.64070727722957
-5.417435556383105
-7.19416383553664
-8.970892114690175
-10.747620393843711

Four - under stay stats and with outlier 

Acceleration for Button Number 4's condition B: -7.05007819496449
Acceleration distance: -2.07625984915483
The Acceleration for the seven models:
-0.8212986475000008
-2.8975584966548302
-4.97381834580966
-7.05007819496449
-9.12633804411932
-11.202597893274149
-13.278857742428979

In [54]:
# Get Peak for execute

crest_dfs = []
peak_dfs = []
fPath = r"C:\Users\angel\Desktop\S1DA\Separated - Backtrace - 230422 - study1\Execute"

for f in os.listdir(fPath):
    New_fPath = os.path.join(fPath, f)
    for csvs in os.listdir(New_fPath):
        csvs_full_path = os.path.join(New_fPath, csvs)
        #print(csvs_full_path)
        df_peak = get_peak_for_var2(csvs_full_path, "Case", "Velocity", False)
        df_crest = get_crest_for_var2(csvs_full_path, "Case", "Velocity", False)
        peak_dfs.append(df_peak)
        crest_dfs.append(df_crest)

combined_peak_dfs = pd.concat(peak_dfs, ignore_index=True)
combined_crest_dfs = pd.concat(crest_dfs, ignore_index=True)



(433, 2)
(400, 2)
(394, 2)
(367, 2)
(205, 2)
(199, 2)
(287, 2)
(277, 2)
(446, 2)
(408, 2)
(1140, 2)
(997, 2)
(639, 2)
(589, 2)
(333, 2)
(315, 2)
(558, 2)
(488, 2)
(598, 2)
(534, 2)


In [80]:
# Get Peak for execute

crest_dfs_wt_outlier = []
peak_dfs_wt_outlier = []
fPath = r"C:\Users\angel\Desktop\S1DA\Separated - Backtrace - 230422 - study1\Execute"

for f in os.listdir(fPath):
    New_fPath = os.path.join(fPath, f)
    for csvs in os.listdir(New_fPath):
        csvs_full_path = os.path.join(New_fPath, csvs)
        #print(csvs_full_path)
        df_peak = get_peak_for_var2(csvs_full_path, "Case", "Acceleration", False, True)
        df_crest = get_crest_for_var2(csvs_full_path, "Case", "Acceleration", False, True)
        peak_dfs.append(df_peak)
        crest_dfs.append(df_crest)

combined_peak_dfs = pd.concat(peak_dfs, ignore_index=True)
combined_crest_dfs = pd.concat(crest_dfs, ignore_index=True)



Acceleration Outlier Removed
    Case  RunTime     Object  EnterTime  Velocity  Acceleration Condition  \
1      1     6.62  FastTapIn       0.02  0.074973     -3.664041             
3      1     6.66  FastTapIn       0.06  0.029872     -2.734504             
5      1     6.70  FastTapIn       0.10  0.041240     -0.901695             
7      1     6.74  FastTapIn       0.14  0.045114     -2.036603             
8      1     6.76  FastTapIn       0.16  0.036381     -0.436655             
..   ...      ...        ...        ...       ...           ...       ...   
422   29    62.58  FastTapIn       0.02  0.372218    -16.551530             
424   29    62.62  FastTapIn       0.06  0.412758     -8.044860             
427   30    64.22  FastTapIn       0.02  0.442050    -24.497850             
430   30    64.28  FastTapIn       0.08  0.289721    -24.856850             
432   30    64.32  FastTapIn       0.12  0.218583    -12.510240             

    PrepTime  
1          0  
3          0  
5

In [76]:
combined_peak_dfs["Acceleration"].mean()

-1.426401721284159

In [77]:
combined_crest_dfs["Acceleration"].mean()

-6.617647153018868