## Import packges

In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.getcwd())))

import numpy as np
import pandas as pd
import random
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg
import statsmodels.api as sm

from statsmodels.graphics.gofplots import qqplot_2samples
from utils.utils import get_rt_quantiles, remove_outliers

In [2]:
sns.set_style("whitegrid");
plt.figure(figsize=(8, 6), dpi=100);

<Figure size 800x600 with 0 Axes>

In [3]:
root = "../"
behavioural_data_root = root +  "Datasets/behavioral_data/raw_data/"
data_output_path = root + "Datasets/behavioral_data/selected_data/LDT_data.csv"
qqplots_dir = "Results/Plots/qqplots/"

In [4]:
dataframes = []
counter = 1
for count, filename in enumerate(os.listdir(behavioural_data_root)):
    df = pd.read_csv(behavioural_data_root + filename, names=["trial", "string_id", "string_type", "accuracy", "rt", "string"])
    if df.iloc[-2, 4] != "English":
        continue
    df = df.dropna().drop("string_id", axis=1).drop([0, 1])
    df = df.iloc[:-2, :]
    ind = df.loc[df["trial"] == "Univ"].index[0]
    df = df.drop([ind, ind+1], axis=0)
    # Dropping rows with wrong accuracies
    df = df.loc[(df["accuracy"] == "0") | (df["accuracy"] == "1")]
    # Converting columns type to suitable data types
    convert_dict = {"string_type": "int16",
                    "accuracy": "int16",
                    "rt": float
                   }

    df = df.astype(convert_dict)
    # Convert RTs to seconds
    df["rt"] = df["rt"].apply(lambda x: x/1000) 
    # Removing Outliers
    df = remove_outliers(df, 3, .2, 2.5)
    # Extracting response of participant from his/her accuracy
    df["response"] = np.logical_not(np.logical_xor(df["string_type"], df["accuracy"])).astype("int")
    df = df.reset_index(drop=True)
    # Particpant number
    df["participant"] = counter
    df["quantile"]=""
    df["minRT"] = df["rt"].min()
    dataframes.append(df)
    counter += 1
behavioural_df = pd.concat(dataframes)

In [33]:
behavioural_df

Unnamed: 0,trial,string_type,accuracy,rt,string,response,participant,quantile,minRT
0,1,0,0,0.548,pracker,1,1,,0.378
1,2,1,1,0.646,nearside,1,1,,0.378
2,3,1,1,0.511,jets,1,1,,0.378
3,4,0,1,0.815,vates,0,1,,0.378
4,5,1,1,0.680,onward,1,1,,0.378
...,...,...,...,...,...,...,...,...,...
3263,3370,1,1,0.589,welt,1,806,,0.423
3264,3371,0,1,0.708,clameworthy,0,806,,0.423
3265,3372,0,1,0.817,esperience,0,806,,0.423
3266,3373,0,1,0.736,Ebe,0,806,,0.423


In [35]:
selected_particpants_id = pd.read_csv(data_output_path, header=None,
                                    names=["accuracy", "rt", "string", "response",
                                          "participant", "minRT", "participant_id"])['participant'].to_numpy()
selected_participants = behavioural_df.loc[behavioural_df['participant'].isin(selected_particpants_id)]

In [36]:
selected_participants = selected_participants.groupby(["participant"]).agg({"rt": ["mean"], "accuracy": ["mean"],
                                                                "response":["mean"]}).reset_index()

In [38]:
all_participants = behavioural_df.groupby(["participant"]).agg({"rt": ["mean"], "accuracy": ["mean"],
                                                                "response":["mean"]}).reset_index()

In [39]:
pg.ttest(selected_participants["rt"].to_numpy(), all_participants["rt"].mean())

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.385318,99,two-sided,0.700828,"[0.77, 0.84]",0.038532,0.119,0.066842


In [46]:
pg.ttest(selected_participants["accuracy"].to_numpy(), all_participants["accuracy"].mean())

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.187763,99,two-sided,0.851447,"[0.85, 0.87]",0.018776,0.113,0.05397


In [40]:
selected_participants.mean()

participant          412.480000
rt           mean      0.806357
accuracy     mean      0.860331
response     mean      0.480645
dtype: float64

In [43]:
selected_participants.std()

participant          222.596953
rt           mean      0.166791
accuracy     mean      0.070514
response     mean      0.028958
dtype: float64

In [41]:
all_participants.mean()

participant          403.500000
rt           mean      0.812783
accuracy     mean      0.861655
response     mean      0.484101
dtype: float64

In [44]:
all_participants.std()

participant          232.816451
rt           mean      0.181040
accuracy     mean      0.066124
response     mean      0.033373
dtype: float64