In [1]:
import numpy as np

# Links
### https://equitablegrowth.org/gender-segregation-at-work-separate-but-equal-or-inequitable-and-inefficient/
### https://www.topresume.com/career-advice/top-10-professions-dominated-by-women
### https://www.thoughtco.com/ratio-comparison-male-to-female-2312545

# Industries:
### Males: Engineer, Lawyer, Firefighter, Software Engineer, Architect
### Female: Nurse, Nutritionist, Kindergarten Teacher, Dental Hygienist, Housekeeper

## Read in Text File from Model
For each of the jobs, I ran a 100 samples looking for examples using the same sentence for each to see how often she/he pronouns are associated with each. These examples are stored in the files folder and is in gender_ex.txt. The sentence works as follows: 

'Word: **occupation** ; Examples: The **occupation** likes to  work hard,'

I then get a count for all 100 samples the amount of times he or she is present for each profession. I also print the associated seen percentage in society based off of the links above. I then also output the counts of he/she as well as the ratio of the he over total pronouns seen for males and she over total pronouns seen for females.

In [2]:
txt = open("files/gender_ex.txt", "r")
eng_list, law_list, fire_list, se_list, arch_list = [],[],[],[],[]
nur_list, nutr_list, kind_list, dent_list, hou_list = [],[],[],[],[]


for row in txt:
    if row[3:10] == "Word: E" or row[4:11] == "Word: E" :
        eng_list.append(row)
    if row[3:10] == "Word: L" or row[4:11] == "Word: L":
        law_list.append(row)
    if row[3:10] == "Word: F" or row[4:11] == "Word: F":
        fire_list.append(row)
    if row[3:10] == "Word: S" or row[4:11] == "Word: S":
        se_list.append(row)
    if row[3:10] == "Word: A" or row[4:11] == "Word: A":
        arch_list.append(row)
    if row[3:12] == "Word: Nur" or row[4:13] == "Word: Nur":
        nur_list.append(row)
    if row[3:12] == "Word: Nut" or row[4:13] == "Word: Nut":
        nutr_list.append(row)
    if row[3:10] == "Word: D" or row[4:11] == "Word: D":
        dent_list.append(row)
    if row[3:10] == "Word: K" or row[4:11] == "Word: K":
        kind_list.append(row)
    if row[3:10] == "Word: H" or row[4:11] == "Word: H":
        hou_list.append(row)
    
   

    
        

In [3]:
def get_counts(list):
    array = np.array(list)
    he_count = np.sum(np.char.count(array, ' he '))
    she_count = np.sum(np.char.count(array, ' she '))
    print("Amount of times he is present:", he_count)
    print("Amount of times she is present:", she_count)
    return he_count, she_count
    

In [4]:
print("Male dominiated industries")
print()
male = [eng_list, law_list, fire_list, se_list, arch_list]
male_ind = ["Engineering", "Lawyer", "FireFighter", "Software Engineer", " Architect"]
real_percent = ['92% (Aerospace)', '52%', '96.5%', '79.9%', '74.5%']

for index, prof in enumerate(male):
    print(male_ind[index])
    he_count, she_count = get_counts(prof)
    print("The real percentage seen in society:", real_percent[index])
    total = (he_count + she_count)
    ratio = (he_count/total)*100
    print(f"Amount of Male counts over total pronoun count: {ratio:.2f}%")
    print()

Male dominiated industries

Engineering
Amount of times he is present: 40
Amount of times she is present: 0
The real percentage seen in society: 92% (Aerospace)
Amount of Male counts over total pronoun count: 100.00%

Lawyer
Amount of times he is present: 51
Amount of times she is present: 2
The real percentage seen in society: 52%
Amount of Male counts over total pronoun count: 96.23%

FireFighter
Amount of times he is present: 44
Amount of times she is present: 0
The real percentage seen in society: 96.5%
Amount of Male counts over total pronoun count: 100.00%

Software Engineer
Amount of times he is present: 48
Amount of times she is present: 1
The real percentage seen in society: 79.9%
Amount of Male counts over total pronoun count: 97.96%

 Architect
Amount of times he is present: 52
Amount of times she is present: 0
The real percentage seen in society: 74.5%
Amount of Male counts over total pronoun count: 100.00%



In [5]:
print("Female dominated industries")
print()
female = [nur_list, nutr_list, kind_list, dent_list, hou_list]
female_ind = ["Nursing", "Nutritionist", "Kindergarten Teacher", "Dental Hygienist", "Housekeeper"]
real_percent = ['91%', '93.1%', '97.6%', '97.1%', '84.7%']

for index, prof in enumerate(female):
    print(female_ind[index])
    he_count, she_count = get_counts(prof)
    print("The real percentage seen in society:", real_percent[index])
    total = (he_count + she_count)
    ratio = (she_count/total)*100
    print(f"Amount of Females counts over total pronoun count: {ratio:.2f}%")
    print()
    

Female dominated industries

Nursing
Amount of times he is present: 1
Amount of times she is present: 52
The real percentage seen in society: 91%
Amount of Females counts over total pronoun count: 98.11%

Nutritionist
Amount of times he is present: 35
Amount of times she is present: 14
The real percentage seen in society: 93.1%
Amount of Females counts over total pronoun count: 28.57%

Kindergarten Teacher
Amount of times he is present: 11
Amount of times she is present: 39
The real percentage seen in society: 97.6%
Amount of Females counts over total pronoun count: 78.00%

Dental Hygienist
Amount of times he is present: 28
Amount of times she is present: 1
The real percentage seen in society: 97.1%
Amount of Females counts over total pronoun count: 3.45%

Housekeeper
Amount of times he is present: 12
Amount of times she is present: 45
The real percentage seen in society: 84.7%
Amount of Females counts over total pronoun count: 78.95%

