# Simpson's Paradox
Using `admission_data.csv` for this.

Simpsons Paradox: UC Brooklyn wanted to find out whether there is a gender bias in their admissions procedure. They ended up concluding that when looked at individually for degrees, it seemed like girls were favoured but in fact, combined, boys were favoured.

<img src="simpson.png"/>

As you can see, individually for major A and B, acceptance rate of Boys is 50% and 10% resp, and girls is 80% and 20% respectively. It might seem that girls are being favoured by this data. But when comined, major A+B, net acceptance rate of Boys is 46% and girls is 26%, proving that boys were being favoured.

In this case study, we are gonna be looking for such insights in out data.

In [2]:
# Load and view first few lines of dataset
import pandas as pd
df = pd.read_csv('admission_data.csv')

df.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


### Proportion and admission rate for each gender

In [3]:
# Proportion of students that are female
num_total = df.shape[0]
print(num_total)
female = df.query('gender == "female"')
num_female = female.shape[0]
print(num_female)
percent_female = (num_female/num_total)*100
print(percent_female)
female.head()

500
257
51.4


Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
2,31441,female,Chemistry,False
4,53714,female,Physics,True
5,50693,female,Chemistry,False
7,27648,female,Chemistry,True


In [4]:
# Proportion of students that are male
male = df.query('gender == "male"')
num_male = male.shape[0]
print(num_male)
percent_male = 100 - percent_female
print(percent_male)

243
48.6


In [5]:
# Admission rate for females
admitted_female = female.query('admitted == True')
admitted_female.head()
#female.dtypes
num_admitted_female = admitted_female.shape[0]
print(num_admitted_female)
percent_admitted_female = (num_admitted_female/num_female)*100
print(percent_admitted_female)

74
28.793774319066145


In [6]:
# Admission rate for males
admitted_male = male.query('admitted == True')
admitted_male.head()
num_admitted_male = admitted_male.shape[0]
print(num_admitted_male)
percent_admitted_male = (num_admitted_male/num_male)*100
print(percent_admitted_male)

118
48.559670781893004


### Proportion and admission rate for physics majors of each gender

In [7]:
# What proportion of female students are majoring in physics?
physics_female = female.query('major == "Physics"')
num_physics_female = physics_female.shape[0]
print(num_physics_female)
percent_physics_female = (num_physics_female/num_female)*100
print(percent_physics_female)

31
12.062256809338521


In [8]:
# What proportion of male students are majoring in physics?
physics_male = male.query('major == "Physics"')
num_physics_male = physics_male.shape[0]
print(num_physics_male)
percent_physics_male = (num_physics_male/num_male)*100
print(percent_physics_male)

225
92.5925925925926


In [9]:
# Admission rate for female physics majors
admitted_physics_female = physics_female.query('admitted == True')
num_admitted_phy_female = admitted_physics_female.shape[0]
num_admitted_phy_female
percent_adm_phy_fem = (num_admitted_phy_female/num_physics_female)*100
print(percent_adm_phy_fem)

74.19354838709677


In [10]:
# Admission rate for male physics majors
admitted_physics_male = physics_male.query('admitted == True')
num_admitted_phy_male = admitted_physics_male.shape[0]
num_admitted_phy_male
percent_adm_phy_male = (num_admitted_phy_male/num_physics_male)*100
print(percent_adm_phy_male)

51.55555555555556


### Proportion and admission rate for chemistry majors of each gender

In [11]:
# What proportion of female students are majoring in chemistry?
chem_female = female.query('major == "Chemistry"')
num_chem_female = chem_female.shape[0]
print(num_chem_female)
percent_chem_female = (num_chem_female/num_female)*100
print(percent_chem_female)

226
87.93774319066148


In [12]:
# What proportion of male students are majoring in chemistry?
chem_male = male.query('major == "Chemistry"')
num_chem_male = chem_male.shape[0]
print(num_chem_male)
percent_chem_male = (num_chem_male/num_male)*100
print(percent_chem_male)

18
7.4074074074074066


In [13]:
# Admission rate for female chemistry majors
admitted_chem_female = chem_female.query('admitted == True')
num_admitted_chem_female = admitted_chem_female.shape[0]
num_admitted_chem_female
percent_adm_chem_fem = (num_admitted_chem_female/num_chem_female)*100
print(percent_adm_chem_fem)

22.566371681415927


In [14]:
# Admission rate for male chemistry majors
admitted_chem_male = chem_male.query('admitted == True')
num_admitted_chem_male = admitted_chem_male.shape[0]
num_admitted_chem_male
percent_adm_chem_male = (num_admitted_chem_male/num_chem_male)*100
print(percent_adm_chem_male)

11.11111111111111


### Admission rate for each major

In [18]:
# Admission rate for physics majors
physics = df.query('major == "Physics"')
total_physics = physics.shape[0]
admitted_physics = physics.query('admitted == True').shape[0]
rate_physics = (admitted_physics / total_physics)*100
print(rate_physics)

54.296875


In [19]:
# Admission rate for chemistry majors
chemistry = df.query('major == "Chemistry"')
total_chem = chemistry.shape[0]
admitted_chem = chemistry.query('admitted == True').shape[0]
rate_chem = (admitted_chem / total_chem)*100
print(rate_chem)

21.721311475409834
