<a href="https://colab.research.google.com/github/ExCaLBBR/ExCaLBBR_Projects/blob/main/RaciallyBiasedDecisions/RaciallyBiasedDecisions_APF/behavioral/code/RBD_IAT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Import libraries
import pandas as pd #for dealing with csv import
import os # for joining paths and filenames sensibly
import numpy as np #for the population std
import glob # for finding csv data files
import platform # paths use different dividers on linux vs windows, so we need to test for this
import string

In [36]:
#@title Define Utility functions
#Compute adjusted mean
def adjustedmean(RTs,corrs,penalty):
    n=len(corrs) #trials
    n_errors=int(n-sum(corrs)) #errors
    # print("Number of correctness: ", int(sum(corrs)))
    # print("Number of errors: ", n_errors)
    cor_RTs=np.array(corrs)*RTs #sum of correct RTs
    cor_mean=sum(cor_RTs)/sum(corrs)

    #mean with errors replaced with penalty value
    return cor_mean+(n_errors*penalty)/n


#Remove timed out trials
def exclude_slows(RTs,corrs,slowRT_limit):
    new_rt=[] #holding variables
    new_cr=[]
    for i in range(len(RTs)): #iterate over every item
        if RTs[i] < slowRT_limit: #if it isn't too fast, include RT and corr values
            new_rt.append(RTs[i])
            new_cr.append(corrs[i])

    return (new_rt, new_cr)

#Compute IAT bias rating
def iat_analyze(congr_rts_raw, congr_corr_raw, incon_rts_raw, incon_corr_raw, df_name):
    #1 discard subject if too many fast responses
    if sum(np.array(congr_rts_raw + incon_rts_raw)<fastRT_limit)>len(congr_rts_raw + incon_rts_raw)*fast_prop_limit:
        print ("excluding subject for BM STR because too many fast responses")
    else:
        #2 Eliminate scores over 10,000 ms

        congr_rts,congr_corr=exclude_slows(congr_rts_raw,congr_corr_raw,slowRT_limit)
        incon_rts,incon_corr=exclude_slows(incon_rts_raw,incon_corr_raw,slowRT_limit)

        #3 Calculate pooled std
        #pooled_std=pooled.std(0) #n-1 std sample std
        #(Use N not N-1 because this is the whole sample).
        #numpy.std is population std
        pooled=congr_rts + incon_rts #all RTs from both blocks, correct and incorrect
        pooled_std=np.std(pooled)

        #4 Calculated adjusted means, including the penalty
        congr_adjmean=adjustedmean(congr_rts,congr_corr,penalty)
        incon_adjmean=adjustedmean(incon_rts,incon_corr,penalty)

        #5 Calculate the IAT, so that pro-stereotype RTs are a -ve score
        IAT=(congr_adjmean-incon_adjmean)/pooled_std

        simpleIAT=sum(congr_rts)/len(congr_rts)-sum(incon_rts)/len(incon_rts)

        print("IAT for " + df_name + " is : {:+.3f}".format(IAT))
        print("Mean difference (uncorrected) " + df_name + " is {:+.3f}".format(simpleIAT)+" seconds")

In [3]:
#@title Load data
df_IAT_BM = pd.read_csv ('https://github.com/ExCaLBBR/ExCaLBBR_Projects/raw/main/RaciallyBiasedDecisions/RaciallyBiasedDecisions-Intersectionality_SURG/data/IAT_BM.csv', header=0)
df_IAT_WM = pd.read_csv ('https://github.com/ExCaLBBR/ExCaLBBR_Projects/raw/main/RaciallyBiasedDecisions/RaciallyBiasedDecisions-Intersectionality_SURG/data/IAT_WM.csv', header=0)

In [4]:
#@title Specify thresholds
penalty=0.600 #penalty - in seconds - for incorrect responses
slowRT_limit=1200 #threshold at which slow RTs are discarded
fastRT_limit=300 #threshold which defines responses which are "too fast"
fast_prop_limit=0.1 # threshold proportion of "too fast" responses which defines exclusion of ppt


In [5]:
#@title Data Extraction
df_IAT_BM.rename(columns={"Spreadsheet: metadata": "Congruence", "allocator-k3xu": "Group Type"}, inplace = True)
df_IAT_BM_2 = df_IAT_BM.drop(["Spreadsheet: ImageLeft", "Spreadsheet: ImageRight"], axis = 1)
df_IAT_WM.rename(columns={"Spreadsheet: metadata": "Congruence", "allocator-k3xu": "Group Type"}, inplace = True)
df_IAT_WM_2 = df_IAT_WM.drop(["Spreadsheet: ImageLeft", "Spreadsheet: ImageRight"], axis = 1)

#Isolate columns based on stereotype category which are not part of the practice blocks
df_IAT_BM_STR = df_IAT_BM_2[((df_IAT_BM_2["Spreadsheet: TextLeft"] == "Strong") | (df_IAT_BM_2["Spreadsheet: TextRight"] == "Strong")) & (~df_IAT_BM_2["Congruence"].str.startswith("practice"))]
df_IAT_BM_INT = df_IAT_BM_2[((df_IAT_BM_2["Spreadsheet: TextLeft"] == "Intelligence") | (df_IAT_BM_2["Spreadsheet: TextRight"] == "Intelligence")) & (~df_IAT_BM_2["Congruence"].str.startswith("practice"))]
df_IAT_WM_STR = df_IAT_WM_2[((df_IAT_WM_2["Spreadsheet: TextLeft"] == "Strong") | (df_IAT_WM_2["Spreadsheet: TextRight"] == "Strong")) & (~df_IAT_WM_2["Congruence"].str.startswith("practice"))]
df_IAT_WM_INT = df_IAT_WM_2[((df_IAT_WM_2["Spreadsheet: TextLeft"] == "Intelligence") | (df_IAT_WM_2["Spreadsheet: TextRight"] == "Intelligence")) & (~df_IAT_WM_2["Congruence"].str.startswith("practice"))]

# BM STR lists RV: polarity of the condition was mislabed in the original data and is corrected here
congr_BM_STR_ID = df_IAT_BM_STR[df_IAT_BM_STR["Congruence"] == "incongruent"]["Participant Private ID"].dropna().tolist()
congr_corr_BM_STR = df_IAT_BM_STR[df_IAT_BM_STR["Congruence"] == "incongruent"]["Correct"].dropna().tolist()
congr_rts_BM_STR = df_IAT_BM_STR[df_IAT_BM_STR["Congruence"] == "incongruent"]["Absolute Reaction Time"].dropna().tolist()
incon_BM_STR_ID = df_IAT_BM_STR[df_IAT_BM_STR["Congruence"] == "congruent"]["Participant Private ID"].dropna().tolist()
incon_corr_BM_STR = df_IAT_BM_STR[df_IAT_BM_STR["Congruence"] == "congruent"]["Correct"].dropna().tolist()
incon_rts_BM_STR = df_IAT_BM_STR[df_IAT_BM_STR["Congruence"] == "congruent"]["Absolute Reaction Time"].dropna().tolist()
# BM INT lists
congr_BM_INT_ID = df_IAT_BM_INT[df_IAT_BM_INT["Congruence"] == "congruent"]["Participant Private ID"].dropna().tolist()
congr_corr_BM_INT = df_IAT_BM_INT[df_IAT_BM_INT["Congruence"] == "congruent"]["Correct"].dropna().tolist()
congr_rts_BM_INT = df_IAT_BM_INT[df_IAT_BM_INT["Congruence"] == "congruent"]["Absolute Reaction Time"].dropna().tolist()
incon_BM_INT_ID = df_IAT_BM_INT[df_IAT_BM_INT["Congruence"] == "incongruent"]["Participant Private ID"].dropna().tolist()
incon_corr_BM_INT = df_IAT_BM_INT[df_IAT_BM_INT["Congruence"] == "incongruent"]["Correct"].dropna().tolist()
incon_rts_BM_INT = df_IAT_BM_INT[df_IAT_BM_INT["Congruence"] == "incongruent"]["Absolute Reaction Time"].dropna().tolist()
# WM STR lists RV: polarity of the condition was mislabed in the original data and is corrected here
congr_WM_STR_ID = df_IAT_WM_STR[df_IAT_WM_STR["Congruence"] == "incongruent"]["Participant Private ID"].dropna().tolist()
congr_corr_WM_STR = df_IAT_WM_STR[df_IAT_WM_STR["Congruence"] == "incongruent"]["Correct"].dropna().tolist()
congr_rts_WM_STR = df_IAT_WM_STR[df_IAT_WM_STR["Congruence"] == "incongruent"]["Absolute Reaction Time"].dropna().tolist()
incon_WM_STR_ID = df_IAT_WM_STR[df_IAT_WM_STR["Congruence"] == "congruent"]["Participant Private ID"].dropna().tolist()
incon_corr_WM_STR = df_IAT_WM_STR[df_IAT_WM_STR["Congruence"] == "congruent"]["Correct"].dropna().tolist()
incon_rts_WM_STR = df_IAT_WM_STR[df_IAT_WM_STR["Congruence"] == "congruent"]["Absolute Reaction Time"].dropna().tolist()
# WM INT lists
congr_WM_INT_ID = df_IAT_WM_INT[df_IAT_WM_INT["Congruence"] == "congruent"]["Participant Private ID"].dropna().tolist()
congr_corr_WM_INT = df_IAT_WM_INT[df_IAT_WM_INT["Congruence"] == "congruent"]["Correct"].dropna().tolist()
congr_rts_WM_INT = df_IAT_WM_INT[df_IAT_WM_INT["Congruence"] == "congruent"]["Absolute Reaction Time"].dropna().tolist()
incon_WM_INT_ID = df_IAT_WM_INT[df_IAT_WM_INT["Congruence"] == "incongruent"]["Participant Private ID"].dropna().tolist()
incon_corr_WM_INT = df_IAT_WM_INT[df_IAT_WM_INT["Congruence"] == "incongruent"]["Correct"].dropna().tolist()
incon_rts_WM_INT = df_IAT_WM_INT[df_IAT_WM_INT["Congruence"] == "incongruent"]["Absolute Reaction Time"].dropna().tolist()
# check correctness list has the same length as the rt list
# print(len(congr_corr_BM_STR))
# print(len(congr_rts_BM_STR))


In [79]:
BM_ID = np.unique(congr_BM_STR_ID)
BM_ID2 = np.where(BM_ID == 8657698.0)

BM_IAT_STR = []
for p in range(len(BM_ID)):
  indx_cong = np.where(np.array(congr_BM_STR_ID) == BM_ID[p])[0]
  cong_corr_BM_STR_pi = np.array(congr_corr_BM_STR)[indx_cong]
  cong_rts_BM_STR_pi = np.array(congr_rts_BM_STR)[indx_cong]
  indx_incon = np.where(np.array(incon_BM_STR_ID) == BM_ID[p])[0]
  incon_corr_BM_STR_pi = np.array(incon_corr_BM_STR)[indx_incon]
  incon_rts_BM_STR_pi = np.array(incon_rts_BM_STR)[indx_incon]
  print(BM_ID[p])
  iat_analyze(cong_rts_BM_STR_pi, cong_corr_BM_STR_pi, incon_rts_BM_STR_pi, incon_corr_BM_STR_pi, "BM STR")

8657248.0
IAT for BM STR is : +0.446
Mean difference (uncorrected) BM STR is +77.385 seconds
8657252.0
IAT for BM STR is : -0.028
Mean difference (uncorrected) BM STR is +14.603 seconds
8657253.0
IAT for BM STR is : -1.000
Mean difference (uncorrected) BM STR is -179.213 seconds
8657254.0
IAT for BM STR is : -0.428
Mean difference (uncorrected) BM STR is -50.412 seconds
8657258.0
IAT for BM STR is : +0.053
Mean difference (uncorrected) BM STR is +8.294 seconds
8657259.0
IAT for BM STR is : +0.331
Mean difference (uncorrected) BM STR is +50.520 seconds
8657271.0
IAT for BM STR is : +1.254
Mean difference (uncorrected) BM STR is +182.686 seconds
8657272.0
IAT for BM STR is : +0.731
Mean difference (uncorrected) BM STR is +125.509 seconds
8657277.0
IAT for BM STR is : -0.012
Mean difference (uncorrected) BM STR is -11.457 seconds
8657304.0
IAT for BM STR is : +0.339
Mean difference (uncorrected) BM STR is +49.976 seconds
8657312.0
IAT for BM STR is : -0.576
Mean difference (uncorrected) B

ValueError: ignored

In [100]:
BM_ID2

(array([63]),)

In [37]:
#Groupwise Analysis
iat_analyze(congr_rts_BM_STR, congr_corr_BM_STR, incon_rts_BM_STR, incon_corr_BM_STR, "BM STR")
iat_analyze(congr_rts_BM_INT, congr_corr_BM_INT, incon_rts_BM_INT, incon_corr_BM_INT, "BM INT")
iat_analyze(congr_rts_WM_STR, congr_corr_WM_STR, incon_rts_WM_STR, incon_corr_WM_STR, "WM STR")
iat_analyze(congr_rts_WM_INT, congr_corr_WM_INT, incon_rts_WM_INT, incon_corr_WM_INT, "WM INT")

IAT for BM STR is : -0.119
Mean difference (uncorrected) BM STR is -21.111 seconds
IAT for BM INT is : +0.012
Mean difference (uncorrected) BM INT is +2.898 seconds
IAT for WM STR is : -0.113
Mean difference (uncorrected) WM STR is -24.281 seconds
IAT for WM INT is : -0.098
Mean difference (uncorrected) WM INT is -19.114 seconds


## Deprecated functions and declarations

In [None]:
# #extract the stuff we're interested in (n.b i am indexing using the column names defined in the csv)
#     #dropna() drops nans
#     #tolist() converts from series to list
#     corrs=df['key_resp_9.corr'].dropna().tolist()
#     rts=df['key_resp_9.rt'].dropna().tolist()
#     block_length=int(len(corrs)/2)
#     #find order
#     order=df['order'].tolist()[0]
#     #1 congr then incong
#     #2 incongr then congr
#     if order==1:
#         congr_corr=corrs[0:block_length]
#         congr_rts=rts[0:block_length]
#         incon_corr=corrs[block_length:]
#         incon_rts=rts[block_length:]
#     else:
#         congr_corr=corrs[block_length:]
#         congr_rts=rts[block_length:]
#         incon_corr=corrs[0:block_length]
#         incon_rts=rts[0:block_length]
#     #1 discard subject if too many fast responses
#     if sum(np.array(congr_rts + incon_rts)<fastRT_limit)>len(congr_rts + incon_rts)*fast_prop_limit:
#         print "excluding subject for " + os.path.basename(filename) + " because too many fast responses"
#     else:
#         #2 Eliminate scores over 10,000 ms

#         congr_rts,congr_corr=exclude_slows(congr_rts,congr_corr,slowRT_limit)
#         incon_rts,incon_corr=exclude_slows(incon_rts,incon_corr,slowRT_limit)

#         #3 Calculate pooled std
#         #pooled_std=pooled.std(0) #n-1 std sample std
#         #(Use N not N-1 because this is the whole sample).
#         #numpy.std is population std
#         pooled=congr_rts + incon_rts #all RTs from both blocks, correct and incorrect
#         pooled_std=np.std(pooled)

#         #4 Calculated adjusted means, including the penalty
#         congr_adjmean=adjustedmean(congr_rts,congr_corr,penalty)
#         incon_adjmean=adjustedmean(incon_rts,incon_corr,penalty)

#         #5 Calculate the IAT, so that pro-stereotype RTs are a -ve score
#         IAT=(congr_adjmean-incon_adjmean)/pooled_std

#         simpleIAT=mean(congr_rts)-mean(incon_rts)

#         print "IAT for " + os.path.basename(filename) + " is : {:+.3f}".format(IAT)
#         print "Mean difference (uncorrected) is {:+.3f}".format(simpleIAT)+" seconds"
# df_IAT_BM_INT_inin = df_IAT_BM_INT[df_IAT_BM_INT["Group Type"] == "Condition_InIn"]
# df_IAT_BM_INT_inoutGender = df_IAT_BM_INT[df_IAT_BM_INT["Group Type"] == "Condition_InOut-gender"]
# df_IAT_BM_INT_inoutRace = df_IAT_BM_INT[df_IAT_BM_INT["Group Type"] == "Condition_InOut-race"]
# df_IAT_BM_INT_inoutBoth = df_IAT_BM_INT[df_IAT_BM_INT["Group Type"] == "Condition_InOut-both"]
# df_IAT_WM_INT_inin = df_IAT_WM_INT[df_IAT_WM_INT["Group Type"] == "Condition_InIn"]
# df_IAT_WM_INT_inoutGender = df_IAT_WM_INT[df_IAT_WM_INT["Group Type"] == "Condition_InOut-gender"]
# df_IAT_WM_INT_inoutRace = df_IAT_WM_INT[df_IAT_WM_INT["Group Type"] == "Condition_InOut-race"]
# df_IAT_WM_INT_inoutBoth = df_IAT_WM_INT[df_IAT_WM_INT["Group Type"] == "Condition_InOut-both"]

## Reference:
IAT data analysis script adapted from https://github.com/tomstafford/IAT

Calculate IAT score from data generated by Robin's PsychoPy script [TQS Feb 2014]

*Greenwald, A. G., Nosek, B. A., & Banaji, M. R. (2003). Understanding and using the implicit association test: I. An improved scoring algorithm. Journal of personality and social psychology, 85(2), 1972-216.*