In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import json
from matplotlib import cm
from matplotlib.colors import ListedColormap
from scipy.stats import ttest_1samp
clist = []
for cname in ["winter", "spring"]:
    c = cm.get_cmap(cname, 100)
    clist.append(ListedColormap(c(np.linspace(0, 1, 80))))

# Importing the data

In [2]:
#define paths
p = os.getcwd()
parent = os.path.dirname(p)
relative = "/Exp 10/data/"
path = parent+relative
fileExt = ".txt"
startStr1 = "jatos"
startStr2 = "study"
startStr3 = "comp"
# list the name of the files in the current folder
files = os.listdir(path)
DATA = []
for file in files:
    if file.startswith(startStr1):
        #print(file)
        subjectFiles = os.listdir(path+file)
        #print(subjectFiles)
        for subjectFile in subjectFiles:
            if subjectFile.startswith(startStr2):
                #print(subjectFile)
                subjectTxtFiles = os.listdir(path+file+"/"+subjectFile)
                #print(subjectTxtFiles)
                for subjectTxtFile in subjectTxtFiles:
                    if subjectTxtFile.startswith(startStr3):
                        fileName = os.listdir(path+file+"/"+subjectFile+"/"+subjectTxtFile)
                        #print(fileName)
                        # Load the JSON data from the text file
                        with open(path+file+"/"+subjectFile+"/"+subjectTxtFile+"/"+fileName[0], "r") as f:
                            json_data = json.load(f)
                        # Create a DataFrame from the JSON data
                        DATA.append([pd.DataFrame(json_data[cName]) for cName in json_data.keys()])
print(DATA[0][0].columns)

Index(['ID_name', 'block', 'color', 'condition', 'correct_keyboard_response',
       'number', 'practice', 'response_keyboard_response',
       'response_time_keyboard_response', 'subject_nr'],
      dtype='object')


In [3]:
DATA

[[        ID_name  block color condition  correct_keyboard_response  number  \
  0     janka1003      0   red      arab                          1       7   
  1     janka1003      0   red      arab                          1       1   
  2     janka1003      0  blue      arab                          1       4   
  3     janka1003      0  blue      arab                          1       2   
  4     janka1003      0  blue      arab                          1       3   
  ...         ...    ...   ...       ...                        ...     ...   
  1227  janka1003      3  blue   classic                          1       9   
  1228  janka1003      3  blue   classic                          1       7   
  1229  janka1003      3  blue   classic                          1       3   
  1230  janka1003      3  blue   classic                          1       1   
  1231  janka1003      3  blue   classic                          1       9   
  
       practice response_keyboard_response  respo

In [None]:
my_list = []
for i in range(len(DATA)):
    my_list.append(DATA[i][0]["ID_name"][0])

In [None]:
def write_list_to_file(lst, filename):
    with open(filename, 'w') as file:
        for item in lst:
            file.write(item + '\n')

# Example usage

file_name = 'jeligek.txt'
write_list_to_file(my_list, file_name)
print(f'Successfully written the list to {file_name}.')


# Looking at the training trials

In [None]:
condType = np.unique(DATA[0][0]['condition'])
respType = np.unique(DATA[0][0]['response_keyboard_response'])
n,m = len(DATA), len(condType)
# saving the results for RT and Errors
practice_results = np.zeros((n, m, 2))
# plot the results
figRT, axRT = plt.subplots(n, m, figsize=(m*5,n*5))
axRT = axRT.flatten()
# looping over the subjects
for di, data in enumerate(DATA):
    data = data[0]
    practice_data = data[data['practice'] == "yes"]
    condType = np.unique(data['condition'])
    for ci, c in enumerate(condType):
        print("Condition: ", c)
        cdata = practice_data[practice_data['condition'] == c]
        # create an RT and an error table
        RT = cdata
        Err = cdata 
        # remove error trials from RT table
        RT = RT[RT['correct_keyboard_response'] == 1]
        # compute correct response
        practice_results[di,ci,0] = np.median(RT['response_time_keyboard_response'])
        practice_results[di,ci,1] = 1-np.mean(Err['correct_keyboard_response'])
        # plot the results
        axRT[di*m+ci].hist(RT['response_time_keyboard_response'], bins=20)
        axRT[di*m+ci].set_title("Subject: "+str(di+1)+", Condition: "+c)
        axRT[di*m+ci].set_xlabel("RT")
        axRT[di*m+ci].set_ylabel("Frequency")
plt.show()

# plot the results
fig, ax = plt.subplots(2, m, figsize=(m*5,2*5))
ax = ax.flatten()
for ci, c in enumerate(condType):
    ax[ci].bar(np.arange(n)+1, practice_results[:,ci,0])
    ax[ci].set_title("Condition: "+c)
    ax[ci].set_xlabel("Subject")
    ax[ci].set_ylabel("RT")
    ax[ci+m].bar(np.arange(n)+1, practice_results[:,ci,1])
    ax[ci+m].set_title("Condition: "+c)
    ax[ci+m].set_xlabel("Subject")
    ax[ci+m].set_ylabel("Error")
plt.show()

        


# Excluding subjects with high error rates

In [None]:
ERR = np.zeros((len(DATA), len(condType)))
for di, data in enumerate(DATA):
    data = data[0]
    data = data[data['practice'] == "no"]
    #display(data.head())
    for ci, c in enumerate(condType):
        print("Condition: ", c)
        cdata = data[data['condition'] == c]
        # summary stat on Errors
        ERR[di,ci] = 1-np.mean(cdata['correct_keyboard_response'])
print(ERR)
plt.plot(ERR.T, "o")
plt.show()
ix = np.unique(np.where(ERR>0.15)[0])
print("subjects excluded: ", ix)
# remaining subjects
ix = np.setdiff1d(np.arange(len(DATA)), ix)
DATA = [DATA[i] for i in ix]
print("subjects included: ", ix)
N = len(DATA)
print("number of subjects: ", N)


In [None]:
[DATA[i][0]['ID_name'][0] for i in np.unique(np.where(ERR>0.45)[0])]

# Analyzing the SNARC effects in the three conditions

In [None]:
# predictiors for the regression
Xs = np.unique(data['number'])
xT = np.stack((np.ones(len(Xs)), Xs), axis=-1)
# saving the results for RT and Errors
main_results = np.zeros((len(DATA), len(condType), 6))
main_results_diff = np.zeros((len(DATA), len(condType), 2, 8))
# looping over the subjects
for di, data in enumerate(DATA):
    data = data[0]
    # removing the training trials
    data = data[data['practice'] == "no"]
    #display(data.head())
    # deleting very long and very short responses
    # plotting the data before
    data = data[(data['response_time_keyboard_response']>200) & (data['response_time_keyboard_response']<2000)]
    # splitting the data into three conditions
    for ci, c in enumerate(condType):
        print("Condition: ", c)
        cdata = data[data['condition'] == c]
        # create an RT and an error table
        RT = cdata
        Err = cdata 
        # remove error trials from RT table
        RT = RT[RT['correct_keyboard_response'] == 1]
        # create a pivot table to get the median of the response times for each numbers
        pivotRT_count = pd.pivot_table(RT, index=['ID_name'], columns=['number','response_keyboard_response'], values=['subject_nr'], aggfunc='count')
        #display(pivotRT_count)
        pivotRT = pd.pivot_table(RT, index=['ID_name'], columns=['number','response_keyboard_response'], values=['response_time_keyboard_response'], aggfunc=[np.median])
        #display(pivotRT)
        # compute correct response
        correct_response = [t if c==1 else respType[respType!=t][0]
                            for c,t in zip(Err['correct_keyboard_response'], Err['response_keyboard_response'])]
        Err = Err.assign(correct_response=correct_response)
        error = [1-c for c in Err['correct_keyboard_response']]
        Err = Err.assign(error=error)
        #display(Err.head())
        # summary stat on Errors
        pivotErr_count = pd.pivot_table(Err, index=['ID_name'], columns=['number','correct_response'], values=['subject_nr'], aggfunc='count')
        #display(pivotErr_count)
        pivotErr = pd.pivot_table(Err, index=['ID_name'], columns=['number','correct_response'], values=['error'], aggfunc=[np.mean])
        #display(pivotErr)
        d_diff, d_reg = [], []
        for d in [pivotRT, pivotErr]:
            diff = [d.values[:, i+1] - d.values[:, i] for i in range(0, pivotErr.shape[1], 2)]
            d_diff.append(diff)
            # print(diff)
            # for i in range(0, pivotErr.shape[1], 2):
            #     print(i, d.values[:, i+1], d.values[:, i], d.values[:, i+1] - d.values[:, i])
            # fit linear regression on the differences
            d_reg.append(np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(xT),xT)),np.transpose(xT)), diff))
        main_results[di,ci,0] = np.median(RT['response_time_keyboard_response'])
        main_results[di,ci,1] = np.mean(Err['error'])
        main_results[di,ci,2] = d_reg[0][0]
        main_results[di,ci,3] = d_reg[0][1]
        main_results[di,ci,4] = d_reg[1][0]
        main_results[di,ci,5] = d_reg[1][1]
        main_results_diff[di,ci,0,:] = d_diff[0]
        main_results_diff[di,ci,1,:] = d_diff[1]

# plot the median RTs
X = np.arange(N)
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for ci, c in enumerate(condType):
    ax[ci].bar(X, main_results[:,ci,0])
    ax[ci].set_title("Condition: "+c)
    ax[ci].set_xlabel("Subject Number")
    ax[ci].set_ylabel("RT")
plt.show()
# plot the mean Errors
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for ci, c in enumerate(condType):
    ax[ci].bar(X, main_results[:,ci,1])
    ax[ci].set_title("Condition: "+c)
    ax[ci].set_xlabel("Subject Number")
    ax[ci].set_ylabel("Error")
# plot the difference between the three conditions RTs
cols = ['r','b','g']
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for ci, c in enumerate(condType):
    for s in range(N):
        ax[ci].plot(Xs, main_results_diff[s,ci,0,:], "o", color=clist[0](s), alpha=0.5)
        ax[ci].plot(Xs, Xs*main_results[s,ci,3] + main_results[s,ci,2], "-", color=clist[0](s), label="Subject "+str(s+1))
    ax[ci].set_title("Condition: "+c)
    ax[ci].set_xlabel("Number")
    ax[ci].set_ylabel("median RT")
    #ax[ci].legend()
plt.show()
# plot the difference between the three conditions ERRs
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
for ci, c in enumerate(condType):
    for s in range(N):
        ax[ci].plot(Xs, main_results_diff[s,ci,1,:], "o", color=clist[0](s), alpha=0.5)
        ax[ci].plot(Xs, Xs*main_results[s,ci,5] + main_results[s,ci,4], "-", color=clist[0](s), label="Subject "+str(s+1))
    ax[ci].set_title("Condition: "+c)
    ax[ci].set_xlabel("Number")
    ax[ci].set_ylabel("mean ERR")
    #ax[ci].legend()
plt.show()

In [None]:

fig, ax = plt.subplots(1, 1, figsize=(15, 5))
ax.violinplot([main_results[:,0,3],main_results[:,1,3],main_results[:,2,3]], showmedians=True, showextrema=True)
ax.plot([0,1,2,3,4],[0,0,0,0,0], "--", color="k", alpha=0.5)
ax.set_xlabel("Conditions")
ax.set_ylabel("Slopes on the median RT diff.")
ax.set_xticks([1,2,3])
ax.set_xticklabels(condType)
ax.set_xlim(0.5,3.5)
#ax[ci].legend()
plt.show()
print(ttest_1samp(main_results[:,0,3],0),ttest_1samp(main_results[:,1,3],0),ttest_1samp(main_results[:,2,3],0))

In [None]:
len(main_results[:,0,3])