In [154]:
import pandas as pd
import glob
import numpy as np
from __future__ import division
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt

In [16]:
training_data = glob.glob('training/*.csv')
training_data

['training\\110_training_2016_Aug_16_1429.csv',
 'training\\111_training_2016_Jul_08_1050.csv',
 'training\\112_training_2016_Jul_15_1138.csv',
 'training\\113_training_2016_Jul_15_1448.csv',
 'training\\114_training_2016_Jul_15_1542.csv',
 'training\\115_training_2016_Jul_21_1239.csv',
 'training\\116_training_2016_Jul_21_1530.csv',
 'training\\117_training_2016_Jul_27_1439.csv',
 'training\\118_training_2016_Aug_04_1343.csv']

In [17]:
df = pd.read_csv(training_data[0])
df.head()

Unnamed: 0,dot_values,colour_values,participant,.thisRepN,.thisTrialN,.thisN,.thisIndex,response,response_time,date,Unnamed: 10
0,,Yellow,110,0,0,0,0,,,2016_Aug_16_1429,
1,1.0,LightPink,110,0,1,1,1,m,0.207572,2016_Aug_16_1429,
2,8.0,Brown,110,0,2,2,2,,,2016_Aug_16_1429,
3,6.0,Purple,110,0,3,3,3,m,0.055839,2016_Aug_16_1429,
4,5.0,Orange,110,0,4,4,4,m,0.015771,2016_Aug_16_1429,


In [18]:
df['correct_response'] = df[['dot_values', 'colour_values']].isnull().all(axis=1)
df['correct_response'] = df['correct_response'].replace({False: 'm', True:'z'})

In [19]:
df.head()

Unnamed: 0,dot_values,colour_values,participant,.thisRepN,.thisTrialN,.thisN,.thisIndex,response,response_time,date,Unnamed: 10,correct_response
0,,Yellow,110,0,0,0,0,,,2016_Aug_16_1429,,m
1,1.0,LightPink,110,0,1,1,1,m,0.207572,2016_Aug_16_1429,,m
2,8.0,Brown,110,0,2,2,2,,,2016_Aug_16_1429,,m
3,6.0,Purple,110,0,3,3,3,m,0.055839,2016_Aug_16_1429,,m
4,5.0,Orange,110,0,4,4,4,m,0.015771,2016_Aug_16_1429,,m


In [22]:
answered_correctly = df['response'] == df['correct_response']
answered_correctly.head()

0    False
1     True
2    False
3     True
4     True
dtype: bool

In [23]:
num_corr = np.sum(answered_correctly)
num_corr

375

In [24]:
num_mistakes = len(df) - num_corr
num_mistakes

58

In [25]:
accuracy = num_corr / len(df)
accuracy

0.86605080831408776

In [26]:
# Reaction time when response is correct
react_time = df["response_time"].loc[answered_correctly]
react_time.head()

1    0.207572
3    0.055839
4    0.015771
5    0.360040
6    0.263624
Name: response_time, dtype: float64

In [27]:
react_time.mean()

0.23162321980794362

In [78]:
combined_df = pd.DataFrame()
df_list = []

needed_cols = ['dot_values', 'colour_values', 'participant', 'response', 'response_time', 'date', 'correct_response',
              'answered_correctly']

for i in training_data:
    df = pd.read_csv(i)
    df['correct_response'] = df[['dot_values', 'colour_values']].isnull().all(axis=1)
    df['correct_response'] = df['correct_response'].replace({False: 'm', True:'z'})
    df['answered_correctly'] = df['response'] == df['correct_response']
    df['answered_correctly'] = df['answered_correctly'].astype(int)
    
    df = df[needed_cols]
    
    df_list.append(df)
    
combined_df = pd.concat(df_list)
combined_df.to_csv("combined_training_data.csv", index=False)

In [79]:
combined_df.head(10)

Unnamed: 0,dot_values,colour_values,participant,response,response_time,date,correct_response,answered_correctly
0,,Yellow,110,,,2016_Aug_16_1429,m,0
1,1.0,LightPink,110,m,0.207572,2016_Aug_16_1429,m,1
2,8.0,Brown,110,,,2016_Aug_16_1429,m,0
3,6.0,Purple,110,m,0.055839,2016_Aug_16_1429,m,1
4,5.0,Orange,110,m,0.015771,2016_Aug_16_1429,m,1
5,7.0,DarkGray,110,m,0.36004,2016_Aug_16_1429,m,1
6,3.0,Blue,110,m,0.263624,2016_Aug_16_1429,m,1
7,9.0,Red,110,m,0.239785,2016_Aug_16_1429,m,1
8,2.0,Green,110,m,0.047875,2016_Aug_16_1429,m,1
9,4.0,Purple,110,m,0.271757,2016_Aug_16_1429,m,1


In [147]:
# Group by participants and get number of correct responses, number of mistakes, and total accuracy
grouped_df = combined_df.groupby('participant')

df = grouped_df.sum()
df['num_mistakes'] = grouped_df.size() - df['answered_correctly']
df = df.rename(columns={'answered_correctly': 'num_corr'})
df = df.reset_index(level=0)
df = df.drop(['dot_values', 'response_time'], axis=1)
df['accuracy'] = df['num_corr'] / (df['num_corr'] + df['num_mistakes']) 

In [148]:
df.head(10)

Unnamed: 0,participant,num_corr,num_mistakes,accuracy
0,110,375,58,0.866051
1,111,395,52,0.883669
2,112,420,29,0.935412
3,113,393,63,0.861842
4,114,394,58,0.871681
5,115,415,35,0.922222
6,116,426,17,0.961625
7,117,443,19,0.958874
8,118,101,346,0.225951


In [149]:
# Get reaction time for correct responses
corr_resp_df = combined_df.loc[ combined_df['response'] == combined_df['correct_response']]
corr_resp_df.head()

Unnamed: 0,dot_values,colour_values,participant,response,response_time,date,correct_response,answered_correctly
1,1.0,LightPink,110,m,0.207572,2016_Aug_16_1429,m,1
3,6.0,Purple,110,m,0.055839,2016_Aug_16_1429,m,1
4,5.0,Orange,110,m,0.015771,2016_Aug_16_1429,m,1
5,7.0,DarkGray,110,m,0.36004,2016_Aug_16_1429,m,1
6,3.0,Blue,110,m,0.263624,2016_Aug_16_1429,m,1


In [150]:
resp_group_df = corr_resp_df.groupby('participant').mean().reset_index(level=0)[['participant', 'response_time']]
resp_group_df = resp_group_df.rename(columns={'response_time':'corr_resp_rt'})
resp_group_df.head()

Unnamed: 0,participant,corr_resp_rt
0,110,0.231623
1,111,0.274088
2,112,0.332942
3,113,0.300568
4,114,0.245778


In [152]:
final_df = pd.merge(df, resp_group_df, on='participant')
final_df

Unnamed: 0,participant,num_corr,num_mistakes,accuracy,corr_resp_rt
0,110,375,58,0.866051,0.231623
1,111,395,52,0.883669,0.274088
2,112,420,29,0.935412,0.332942
3,113,393,63,0.861842,0.300568
4,114,394,58,0.871681,0.245778
5,115,415,35,0.922222,0.329818
6,116,426,17,0.961625,0.290328
7,117,443,19,0.958874,0.314148
8,118,101,346,0.225951,0.330066


In [170]:
# AVERAGE ACCURACY
final_df['accuracy'].mean()

0.831925334033014

In [171]:
# AVERAGE CORRECT RESPONSE RT
final_df['corr_resp_rt'].mean()

0.29437300621786266