In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn import tree
import matplotlib.pyplot as plt
# import graphviz
from sklearn.metrics import r2_score

In [2]:
def read_dir(path):
    data = None
    for f in Path(path).iterdir():
        if f.is_file() and f.name.endswith('.txt'):
            if data is None:
                data = pd.read_csv(f, skiprows=6, sep='\t')
            else:
                data = data.append(pd.read_csv(f, skiprows=6, sep='\t'))
    return data

In [3]:
features = ['Rect1Height', 'Rect1Width', 'Rect2Height', 'Rect2Width', 'Rect3Height', 'Rect3Width']
rename = {'Rect1Height': 'A1', 
          'Rect1Width': 'A2', 
          'Rect2Height': 'B1', 
          'Rect2Width': 'B2', 
          'Rect3Height': 'C1',
          'Rect3Width': 'C2'}

## E1

In [4]:
experiments = ['a', 'b', 'c']
data = None
for exp in experiments:
    d = read_dir(f'data/E1{exp}/')
    d = d[d.blkNum > 0]
    d = d[~d.Effect.str.startswith('Fill')]
    d = d[d.RT < 7000]
    d = d[d.response > 0]
    d.RT /= 1000
    d[features] = np.log(d[features])
    d.sort_values(by=['username', 'blkNum', 'trlNum'], inplace=True)  
    d = d.rename(columns=rename)
    d.to_csv(f'data/E1{exp}.csv', index=False)    
    if data is None:
        data = d
    else:
        data.append(d, sort=True)

data.sort_values(by=['username', 'blkNum', 'trlNum'], inplace=True)
data.to_csv('data/E1.csv', index=False)

## E2

In [5]:
data = read_dir('data/E2/')
data = data[data.blkNum > 0]
data = data[~data.Effect.str.startswith('Fill')]
data = data[(data.RT > 100) & (data.RT < 8000)]
data = data[data.response > 0]
data.RT /= 1000
data[features] = np.log(data[features])
data.sort_values(by=['username', 'blkNum', 'trlNum'], inplace=True)
data = data.rename(columns=rename)
data.to_csv('data/E2.csv', index=False)