In [None]:
import pandas as pd
from scipy import stats
import numpy as np

### Data process

In [None]:
# Convert text competitive rating to numerical 
competitive_mapping = {
'Extremely uncompetitive': 1,
'Uncompetitive': 2,
'Somewhat uncompetitive': 3, 
'Neutral (neither competitive nor uncompetitive)':4,
'Somewhat competitive': 5,
'Competitive': 6,
'Extremely competitive': 7
}


In [None]:
groud_truth_gender = {
'letter1': 'Female',
'letter2': 'Female',
'letter3': 'Male',
'letter4': 'Male'
}

ground_truth_writer_gender = {
'letter1': 'Male',
'letter2': 'Male',
'letter3': 'Male',
'letter4': 'Male'
}

ground_truth_strength = {
'letter1': 'Strong',
'letter2': 'Weak',
'letter3': 'Strong',
'letter4': 'Weak'
}

In [None]:
df1 = pd.read_excel('Responses/letter1_female_strong.xlsx')  
df2 = pd.read_excel('Responses/letter2_female_weak.xlsx') 
df3 = pd.read_excel('Responses/letter3_male_strong.xlsx') 
df4 = pd.read_excel('Responses/letter4_male_weak.xlsx') 

df1['competitive_score'] = df1['competitiveness'].apply(lambda x: competitive_mapping[x])
df2['competitive_score'] = df2['competitiveness'].apply(lambda x: competitive_mapping[x])
df3['competitive_score'] = df3['competitiveness'].apply(lambda x: competitive_mapping[x])
df4['competitive_score'] = df4['competitiveness'].apply(lambda x: competitive_mapping[x])

### Candidate Gender Question Analysis 

#### Remove invalid responses

In [None]:
# Remove responses where participants simply guessed the answer
invalid_index1 = [4,5,9]
df1_clean = df1.drop(invalid_index1)

invalid_index2 = [1, 5]
df2_clean = df2.drop(invalid_index2)

invalid_index3 = [2, 8, 12]
df3_clean = df3.drop(invalid_index3)

invalid_index4 = [1, 2, 3, 4, 5, 9, 10, 11, 12]
df4_clean = df4.drop(invalid_index4)

#### Accuracy of candidate gender question

In [None]:
def get_accuracy(df, column, ground_truth):
    ans = df[column].values.tolist()
    correct = 0
    for val in ans:
        if val == ground_truth:
            correct += 1
    # return correct, len(ans) - correct, correct / len(ans)
    return correct / len(ans)

In [None]:
print(get_accuracy(df1_clean, 'candidate_gender', groud_truth_gender['letter1']))
print(get_accuracy(df2_clean, 'candidate_gender', groud_truth_gender['letter2']))
print(get_accuracy(df3_clean, 'candidate_gender', groud_truth_gender['letter3']))
print(get_accuracy(df4_clean, 'candidate_gender', groud_truth_gender['letter4']))

#### Candidate gender question confidence

In [None]:
print(np.mean([df1_clean['candidate_gender_confidence'].describe()['mean'],
       df2_clean['candidate_gender_confidence'].describe()['mean'],
        df3_clean['candidate_gender_confidence'].describe()['mean'],
       df4_clean['candidate_gender_confidence'].describe()['mean']]))

#### Accuracy of letter writer gender question

In [None]:
print(get_accuracy(df1_clean, 'writer_gender', ground_truth_writer_gender['letter1']))
print(get_accuracy(df2_clean, 'writer_gender', ground_truth_writer_gender['letter2']))
print(get_accuracy(df3_clean, 'writer_gender', ground_truth_writer_gender['letter3']))
print(get_accuracy(df4_clean, 'writer_gender', ground_truth_writer_gender['letter4']))

### Competitiveness Ratings Analysis

#### Overall

In [None]:
df1[['competitive_score']].describe()

In [None]:
df2[['competitive_score']].describe()

In [None]:
df3[['competitive_score']].describe()

In [None]:
df4[['competitive_score']].describe()

#### Stratified by letter strength

In [None]:
df_female = pd.concat([df1,df2])
df_male = pd.concat([df3,df4])

In [None]:
# Stastical test
print(stats.ttest_ind(df_female['competitive_score'].values.tolist(),df_male['competitive_score'].values.tolist()))

In [None]:
df_female[['competitive_score']].describe()

In [None]:
df_male[['competitive_score']].describe()

#### Stratified by letter strength

In [None]:
df_weak = pd.concat([df2,df4])
df_strong = pd.concat([df1,df3])

In [None]:
# Stastical test
print(stats.ttest_ind(df_weak['competitive_score'].values.tolist(),df_strong['competitive_score'].values.tolist()))

In [None]:
df_strong[['competitive_score']].describe()

In [None]:
df_weak[['competitive_score']].describe()

#### Stratified by perceived gender

In [None]:
# letter 1
df1_female = df1_clean.loc[df1_clean['candidate_gender'] == 'Female']
df1_male = df1_clean.loc[df1_clean['candidate_gender'] == 'Male']

df1_female.describe()

In [None]:
df1_male.describe()

In [None]:
# letter 2
df2_female = df2_clean.loc[df2_clean['candidate_gender'] == 'Female']
df2_male = df2_clean.loc[df2_clean['candidate_gender'] == 'Male']

df2_female.describe()

In [None]:
df2_male.describe()

In [None]:
# letter 3
df3_female = df3_clean.loc[df3_clean['candidate_gender'] == 'Female']
df3_male = df3_clean.loc[df3_clean['candidate_gender'] == 'Male']

df3_female.describe()

In [None]:
df3_male.describe()

In [None]:
# letter 4
df4_female = df4_clean.loc[df4_clean['candidate_gender'] == 'Female']
df4_male = df4_clean.loc[df4_clean['candidate_gender'] == 'Male']

df4_female.describe()

In [None]:
df4_male.describe()