In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.getcwd())))

import numpy as np
import pandas as pd
import cmdstanpy
from utils.utils import get_parameters_range

In [2]:
root = "../"
plots_root = "Results/hierarchical/Plots/"
datasets_root = root + "Datasets/"
behavioural_data_root = datasets_root +  "behavioral_data/selected_data/"
dataset_path = datasets_root + "AI Models Results/fastText_FC.csv"
path_to_stan_output = root + "Estimations/Results/hierarchical/stan_results/ANN-RDM_full_FT"

## Prepare data

Loading words and non-words with zipf and predicted probabilities

In [3]:
word_nword_df = pd.read_csv(dataset_path, header=None,
                            names =["string", "freq",  "label", "zipf",
                                    "category", "word_prob", "non_word_prob"])
word_nword_df

Unnamed: 0,string,freq,label,zipf,category,word_prob,non_word_prob
0,Ipe's,0,0,0.000000,NW,6.484266e-08,9.999998e-01
1,toothcamb,0,0,0.000000,NW,4.641062e-05,9.999536e-01
2,flicks,702,1,2.935287,LF,9.993498e-01,6.502719e-04
3,Samoar,0,0,0.000000,NW,1.966545e-06,9.999980e-01
4,lastened,0,0,0.000000,NW,1.107923e-04,9.998892e-01
...,...,...,...,...,...,...,...
74312,drased,0,0,0.000000,NW,6.662523e-05,9.999334e-01
74313,exorcism,238,1,3.274105,HF,9.999999e-01,7.543648e-08
74314,pobs,0,0,0.000000,NW,1.563252e-07,9.999999e-01
74315,undemonstrative,3,1,1.592864,LF,9.999452e-01,5.486609e-05


In [4]:
# Reading LDT Data
behavioural_df = pd.read_csv(behavioural_data_root + "LDT_data.csv",
                             header=None,
                             names=["accuracy", "rt", "string", "response",
                                    "participant", "minRT", "participant_id"])
# Merging  behavioral dataframe with word_nonword_df to have words and non-words data with behavioral data
behavioural_df = pd.merge(behavioural_df, word_nword_df, on="string", how="left").dropna().reset_index(drop=True)
behavioural_df = behavioural_df.drop(["freq", "participant"], axis=1)

In [5]:
behavioural_df.head()

Unnamed: 0,accuracy,rt,string,response,minRT,participant_id,label,zipf,category,word_prob,non_word_prob
0,1,0.378,bodule,0,0.378,1,0.0,0.0,NW,1.3e-05,0.999987
1,1,0.415,remember,1,0.378,1,1.0,5.733796,HF,0.999947,5.3e-05
2,1,0.425,mellow,1,0.378,1,1.0,3.479355,HF,0.999825,0.000175
3,1,0.43,gluff,0,0.378,1,0.0,0.0,NW,1e-06,0.999999
4,1,0.435,imversion,0,0.378,1,0.0,0.0,NW,0.00075,0.99925


In [6]:
# Reducing size of dataframe for testing purposes
behavioural_df = behavioural_df.loc[behavioural_df["participant_id"].isin(np.arange(3))]

### Get Parameters Range 

In [7]:
ranges = get_parameters_range(path_to_stan_output, behavioural_df)
ranges

	Chain 1 had 3 divergent transitions (0.6%)
	Chain 2 had 2 divergent transitions (0.4%)
	Use function "diagnose()" to see further information.


Unnamed: 0,mean,std
alpha_sbj,1.852985,0.407143
b_sbj,0.33714,0.132949
k_1_sbj,2.452307,0.491251
k_2_sbj,3.467335,0.371054
threshold_sbj_word,1.202835,0.126076
threshold_sbj_nonword,1.655854,0.143301
g_sbj,0.067546,0.053861
m_sbj,1.494549,0.218995


In [8]:
ranges.index = ["alpha", "b", "k_1", "k_2", "threshold_word", "threshold_nonword", "g", "m"]

In [9]:
ranges.to_csv("Data/params_range.csv")