<a href="https://colab.research.google.com/github/ExCaLBBR/ExCaLBBR_Projects/blob/main/RaciallyBiasedDecisions/Racialized-Gendered_Names/analysis/RGN_proportions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Load relevant libraries

In [1]:
import pandas as pd

# Race and gender label analysis

Data url dir

In [2]:
# RGN data
fil = 'https://raw.githubusercontent.com/ExCaLBBR/ExCaLBBR_Projects/main/RaciallyBiasedDecisions/Racialized-Gendered_Names/data/taskRGN.csv'

#SpecRemove specific participants
  #Current condition: fixed responded & incorrect response to check
rem = ['dwgxuk', 'tdqdqk', 'ppluks']

Load RGN data

In [3]:

df_task = pd.read_csv(fil, index_col = 0, header = [0,1,2])
#display(df_task)

Parse relevant substructures

In [4]:
df_RaceLabels = df_task['Response']['Race']
df_GenderLabels = df_task['Response']['Gender']
df_RaceRT = df_task['Reaction Time']['Gender']
df_GenderRT = df_task['Reaction Time']['Gender']

Remove specified participants

In [5]:
remIdxRL = [df_RaceLabels.columns.get_loc(r) for r in rem if r in df_RaceLabels]
remIdxGL = [df_GenderLabels.columns.get_loc(r) for r in rem if r in df_GenderLabels]
remIdxRT = [df_RaceRT.columns.get_loc(r) for r in rem if r in df_RaceRT]
remIdxGT = [df_GenderRT.columns.get_loc(r) for r in rem if r in df_GenderRT]

Obtain sub-dataframes corresponding to relevant features

In [6]:
df_RaceLabels = df_RaceLabels.drop(df_RaceLabels.columns[remIdxRL],axis = 1)
df_GenderLabels  = df_GenderLabels.drop(df_GenderLabels.columns[remIdxGL],axis = 1)
df_RaceRT  = df_RaceRT.drop(df_RaceRT.columns[remIdxRT],axis = 1)
df_GenderRT  = df_GenderRT.drop(df_GenderRT.columns[remIdxGT],axis = 1)

Obtain proportions for Race Labels

In [None]:
# df_RaceLabels

In [7]:
 
names_race = df_RaceLabels.index.values

name_race_dict = dict()
participantCount = df_RaceLabels.shape[1]
for (index, row) in df_RaceLabels.iterrows():
    race_label_dict = dict()
    for entry in row.values:
      if entry not in race_label_dict:
        race_label_dict[entry] = 1
      else:
        race_label_dict[entry] += 1
    for raceLabelKey in race_label_dict:
        raceLabelCount = race_label_dict[raceLabelKey]
        race_label_dict[raceLabelKey] = raceLabelCount / participantCount
    if index not in name_race_dict:
        name_race_dict[index] = race_label_dict
df_RaceLabelProportion = pd.DataFrame.from_dict(name_race_dict, orient = "index")

Display race labels proportion

In [None]:
# df_RaceLabelProportion

Obtain proportions for Gender Labels

In [None]:
# df_GenderLabels

In [8]:
names_gender = df_GenderLabels.index.values
# names_gender

participantCount = df_GenderLabels.shape[1]
name_gender_dict = dict()
for (index, row) in df_GenderLabels.iterrows():
    gender_label_dict = dict()
    for entry in row.values:
      if entry not in gender_label_dict:
        gender_label_dict[entry] = 1
      else:
        gender_label_dict[entry] += 1
    for genderLabelKey in gender_label_dict:
        genderLabelCount = gender_label_dict[genderLabelKey]
        gender_label_dict[genderLabelKey] = genderLabelCount / participantCount
    if index not in name_gender_dict:
        name_gender_dict[index] = gender_label_dict
df_GenderLabelProportion = pd.DataFrame.from_dict(name_gender_dict, orient = "index")

Display gender labels percentage

In [33]:
genderLabelKey

'Female'

In [None]:
# df_GenderLabelProportion

# Consensus Analysis

Loading demographic data and processing

In [9]:
demographic_url = 'https://raw.githubusercontent.com/ExCaLBBR/ExCaLBBR_Projects/main/RaciallyBiasedDecisions/Racialized-Gendered_Names/data/raw/data_demographic.csv'
df_demographic = pd.read_csv(demographic_url)
include = ["Participant Completion Code", "Question Key", "Response"]
df_demographic.drop(columns=df_demographic.columns.difference(include), inplace=True)
remIdxCode = []
for (index, row) in df_demographic.iterrows():
  if row["Participant Completion Code"] in rem:
    remIdxCode.append(index)
df_demographic.drop(remIdxCode, inplace = True)
remIdxRG = []
for (index, row) in df_demographic.iterrows():
  if row["Question Key"] not in ["raceEth_v2", "gender"]:
    remIdxRG.append(index)
df_demographic.drop(remIdxRG, inplace = True)
# df_demographic

Group participants by race and get participant completion codes of each group

In [10]:
df_AsianParticipants = df_demographic[df_demographic.Response == "Asian"]
df_BlackAmericanParticipants = df_demographic[df_demographic.Response == "Black or African American"]
df_LatinoParticipants = df_demographic[df_demographic.Response == "Hispanic or Latino"]
df_WhiteParticipants = df_demographic[df_demographic.Response == "White"]
asian_pcc = df_AsianParticipants["Participant Completion Code"].values
black_american_pcc = df_BlackAmericanParticipants["Participant Completion Code"].values
latino_pcc = df_LatinoParticipants["Participant Completion Code"].values
white_pcc = df_WhiteParticipants["Participant Completion Code"].values

Create dataframe for consensus among Asian Participants

In [11]:
asian_RaceLabelResponse = df_RaceLabels[asian_pcc]
asian_name_race_dict = dict()
for (index, row) in asian_RaceLabelResponse.iterrows():
    raceResponseCnt = row.value_counts()
    for raceLabel in ["Black American", "Chinese American", "Hispanic or Latino", "White American"]:
        if raceLabel not in raceResponseCnt:
            raceResponseCnt[raceLabel] = 0
    if index not in asian_name_race_dict:
        asian_name_race_dict[index] = raceResponseCnt.to_dict()
df_AsianRLR = pd.DataFrame.from_dict(asian_name_race_dict, orient = "index")

In [34]:
asian_GenderLabelResponse = df_GenderLabels[asian_pcc]
name_gender_dict = dict()
for (index, row) in asian_GenderLabelResponse.iterrows():
    genResponseCnt = row.value_counts()
    for genLabel in ["Female", "Male"]:
        if genLabel not in genResponseCnt:
            genResponseCnt[genLabel] = 0
    if index not in name_gender_dict:
        name_gender_dict[index] = genResponseCnt.to_dict()
df_AsianRLR = pd.DataFrame.from_dict(name_gender_dict, orient = "index")

In [35]:
df_AsianRLR

Unnamed: 0,Female,Male
# Aaliyiah,39,1
# Aisha,39,1
# Alex,1,39
# Becky,35,5
# Brody,0,40
# Casey,21,19
# Charlotte,35,5
# Deshawn,2,38
# Emma,39,1
# Enrique,3,37


Created dataframe for race responses count of each name among Asian participants


In [None]:
# df_AsianRLR

Sort by response mode

In [12]:
df_AsianRLR["Response Mode"] = df_AsianRLR.max(axis = 1)
df_AsianConsensus = df_AsianRLR.sort_values(by="Response Mode", ascending = False)
# df_AsianConsensus

Follow the similar procedures to get consensus dataframe of Black American, Latino or Hispanic, and White participants.

Black American consensus:

In [13]:
black_american_RaceLabelResponse = df_RaceLabels[black_american_pcc]
black_american_name_race_dict = dict()
for (index, row) in black_american_RaceLabelResponse.iterrows():
    raceResponseCnt = row.value_counts()
    for raceLabel in ["Black American", "Chinese American", "Hispanic or Latino", "White American"]:
        if raceLabel not in raceResponseCnt:
            raceResponseCnt[raceLabel] = 0
    if index not in black_american_name_race_dict:
        black_american_name_race_dict[index] = raceResponseCnt.to_dict()
df_BlackAmericanRLR = pd.DataFrame.from_dict(black_american_name_race_dict, orient = "index")
df_BlackAmericanRLR["Response Mode"] = df_BlackAmericanRLR.max(axis = 1)
df_BlackAmericanConsensus = df_BlackAmericanRLR.sort_values(by="Response Mode", ascending = False)
# df_BlackAmericanConsensus

Latino or Hispanic consensus:

In [14]:
latino_RaceLabelResponse = df_RaceLabels[latino_pcc]
latino_name_race_dict = dict()
for (index, row) in latino_RaceLabelResponse.iterrows():
    raceResponseCnt = row.value_counts()
    for raceLabel in ["Black American", "Chinese American", "Hispanic or Latino", "White American"]:
        if raceLabel not in raceResponseCnt:
            raceResponseCnt[raceLabel] = 0
    if index not in latino_name_race_dict:
        latino_name_race_dict[index] = raceResponseCnt.to_dict()
df_LatinoRLR = pd.DataFrame.from_dict(latino_name_race_dict, orient = "index")
df_LatinoRLR["Response Mode"] = df_LatinoRLR.max(axis = 1)
df_LatinoConsensus = df_LatinoRLR.sort_values(by="Response Mode", ascending = False)
# df_LatinoConsensus

White American consensus:

In [15]:
white_RaceLabelResponse = df_RaceLabels[white_pcc]
white_name_race_dict = dict()
for (index, row) in white_RaceLabelResponse.iterrows():
    raceResponseCnt = row.value_counts()
    for raceLabel in ["Black American", "Chinese American", "Hispanic or Latino", "White American"]:
        if raceLabel not in raceResponseCnt:
            raceResponseCnt[raceLabel] = 0
    if index not in white_name_race_dict:
        white_name_race_dict[index] = raceResponseCnt.to_dict()
df_WhiteRLR = pd.DataFrame.from_dict(white_name_race_dict, orient = "index")
df_WhiteRLR["Response Mode"] = df_WhiteRLR.max(axis = 1)
df_WhiteConsensus = df_WhiteRLR.sort_values(by="Response Mode", ascending = False)
# df_WhiteConsensus

Export consensus data

In [16]:
with pd.ExcelWriter("RGN_participant_consensus.xlsx") as writer:
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    df_AsianConsensus.to_excel(writer, sheet_name="Asian Participants", index = True)
    df_BlackAmericanConsensus.to_excel(writer, sheet_name="Black American Participants", index = True)
    df_LatinoConsensus.to_excel(writer, sheet_name="Latino Participants", index = True)
    df_WhiteConsensus.to_excel(writer, sheet_name="White Participants", index = True)

In [None]:

# Pair demographic with RGN: Compute proportions for Race Labels and Gender Labels by each of 4 groups
  #This will require loading the demographic csv file
#Compute Reaction time for each # name across identity groups

In [18]:
df_AsianConsensus

Unnamed: 0,Black American,White American,Hispanic or Latino,Chinese American,Response Mode
# Sam,0,40,0,0,40
# Yinuo,0,0,0,40,40
# Mingze,0,0,0,40,40
# Yuxi,0,1,0,39,39
# Xinyi,1,0,0,39,39
# Jack,0,39,0,1,39
# Ting,0,0,1,39,39
# Haoyu,0,0,1,39,39
# Muyang,0,0,1,39,39
# Yize,0,0,1,39,39


# Accuracy Analysis

load stimuli (correct answer data)

In [17]:
stimuli_url = "https://raw.githubusercontent.com/ExCaLBBR/ExCaLBBR_Projects/main/RaciallyBiasedDecisions/Racialized-Gendered_Names/stimuli.csv"
df_stimuli = pd.read_csv(stimuli_url)
df_stimuli["Name"] = df_stimuli["Name"].map("# {}".format)
# df_stimuli

Join stimuli race label column on the consensus to show comparison

In [19]:
# df_AsianConsensus
df_AsianAccuracy = df_AsianConsensus.join(df_stimuli.set_index("Name"))
df_AsianAccuracy.drop(columns=["Gender"], inplace=True)
accuracy = []
for (index, row) in df_AsianAccuracy.iterrows():
    stimuli = row["Race"]
    accuracy.append(row[stimuli]/40)
df_AsianAccuracy["Accuracy"] = accuracy
# df_AsianAccuracy

In [None]:
df_BlackAmericanAccuracy = df_BlackAmericanConsensus.join(df_stimuli.set_index("Name"))
df_BlackAmericanAccuracy.drop(columns=["Gender"], inplace=True)
accuracy = []
for (index, row) in df_BlackAmericanAccuracy.iterrows():
    stimuli = row["Race"]
    accuracy.append(row[stimuli]/38)
df_BlackAmericanAccuracy["Accuracy"] = accuracy
# df_BlackAmericanAccuracy

In [None]:
df_LatinoAccuracy = df_LatinoConsensus.join(df_stimuli.set_index("Name"))
df_LatinoAccuracy.drop(columns=["Gender"], inplace=True)
accuracy = []
for (index, row) in df_LatinoAccuracy.iterrows():
    stimuli = row["Race"]
    accuracy.append(row[stimuli]/37)
df_LatinoAccuracy["Accuracy"] = accuracy
# df_LatinoAccuracy

In [None]:
df_WhiteAccuracy = df_WhiteConsensus.join(df_stimuli.set_index("Name"))
df_WhiteAccuracy.drop(columns=["Gender"], inplace=True)
accuracy = []
for (index, row) in df_WhiteAccuracy.iterrows():
    stimuli = row["Race"]
    accuracy.append(row[stimuli]/40)
df_WhiteAccuracy["Accuracy"] = accuracy
# df_WhiteAccuracy

In [None]:
%pwd

'/content'

In [20]:
df_AsianAccuracy

Unnamed: 0,Black American,White American,Hispanic or Latino,Chinese American,Response Mode,Race,Accuracy
# Sam,0,40,0,0,40,White American,1.0
# Yinuo,0,0,0,40,40,Chinese American,1.0
# Mingze,0,0,0,40,40,Chinese American,1.0
# Yuxi,0,1,0,39,39,Chinese American,0.975
# Xinyi,1,0,0,39,39,Chinese American,0.975
# Jack,0,39,0,1,39,White American,0.975
# Ting,0,0,1,39,39,Chinese American,0.975
# Haoyu,0,0,1,39,39,Chinese American,0.975
# Muyang,0,0,1,39,39,Chinese American,0.975
# Yize,0,0,1,39,39,Chinese American,0.975
