In [233]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, t
import config 
import seaborn as sns

In [234]:
# get module variables
ROOT_DIR = config.ROOT_DIR
sns.set_style('whitegrid')

In [235]:
student_grades = pd.read_csv(rf"{ROOT_DIR}\data\student_grades.csv")

In [236]:
student_grades

Unnamed: 0,Student ID,Undergrad Degree,Undergrad Grade,MBA Grade,Work Experience,Employability (Before),Employability (After),Status,Annual Salary
0,1,Business,68.4,90.2,No,252,276,Placed,111000.0
1,2,Business,62.1,92.8,No,423,410,Not Placed,
2,3,Computer Science,70.2,68.7,Yes,101,119,Placed,107000.0
3,4,Engineering,75.1,80.7,No,288,334,Not Placed,
4,5,Finance,60.9,74.9,No,248,252,Not Placed,
...,...,...,...,...,...,...,...,...,...
90,91,Business,76.0,77.9,No,326,369,Placed,99500.0
91,92,Computer Science,67.7,86.1,No,421,457,Placed,107000.0
92,93,Engineering,75.3,89.9,No,368,421,Not Placed,
93,94,Engineering,68.1,83.1,No,279,282,Placed,84000.0


#### Estimate employability of students before they graduate with alpha 0.05

In [237]:
employability = student_grades.loc[:, ["Student ID", "Employability (Before)"]]
sample_mean = employability["Employability (Before)"].mean()
sample_std = employability["Employability (Before)"].std()
sample_size = employability["Employability (Before)"].count()
sample_mean, sample_std, sample_size

(239.90526315789472, 85.94048827645774, 95)

In [238]:
# get the t statistic for given alpha
t_stat = t.ppf(1-0.025, df=sample_size-1)
t_stat

1.9855234417658298

In [239]:
# get the marging of error and range of mean for given alpha
margin_of_error = t_stat * (sample_std / np.sqrt(sample_size))
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)
confidence_interval

(222.3982913235513, 257.41223499223815)

In [240]:
# using scipy function to compute confidence interval in one step
t.interval(confidence=0.95, df=sample_size-1, loc=sample_mean, scale=sample_std/np.sqrt(sample_size))

(222.3982913235513, 257.41223499223815)

#### Estimate employability of students after they graduate with alpha 0.1

In [241]:
employability_after = student_grades.loc[:, ["Student ID", "Employability (After)"]]
employability_after

Unnamed: 0,Student ID,Employability (After)
0,1,276
1,2,410
2,3,119
3,4,334
4,5,252
...,...,...
90,91,369
91,92,457
92,93,421
93,94,282


In [242]:
# get sample statistics
sample_mean = employability_after["Employability (After)"].mean()
sample_size = employability_after["Employability (After)"].count()
sample_std = employability_after["Employability (After)"].std()
sample_mean, sample_std, sample_size

(289.34736842105264, 93.52104876523815, 95)

In [243]:
# get the t statistic for given alpha
t_stat = t.ppf(1-0.05, df=sample_size-1)
t_stat

1.6612258552697985

In [244]:
# get margin of error and range of mean for given alpha
margin_of_error = t_stat * (sample_std / np.sqrt(sample_size))
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)
confidence_interval

(273.4078114011075, 305.2869254409978)

In [245]:
# use scipy interval function to compute confidence interval 
sample_size = employability_after["Employability (After)"].count()
t.interval(confidence=0.9, df=sample_size-1, loc=employability_after["Employability (After)"].mean(), scale=employability_after["Employability (After)"].std()/np.sqrt(sample_size))

(273.4078114011075, 305.2869254409978)