In [72]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import config 
from scipy.stats import norm

In [73]:
# get module variables
ROOT_DIR = config.ROOT_DIR
sns.set_style('whitegrid')

In [74]:
student_grades = pd.read_csv(rf"{ROOT_DIR}\data\student_grades.csv")

In [75]:
student_grades

Unnamed: 0,Student ID,Undergrad Degree,Undergrad Grade,MBA Grade,Work Experience,Employability (Before),Employability (After),Status,Annual Salary
0,1,Business,68.4,90.2,No,252,276,Placed,111000.0
1,2,Business,62.1,92.8,No,423,410,Not Placed,
2,3,Computer Science,70.2,68.7,Yes,101,119,Placed,107000.0
3,4,Engineering,75.1,80.7,No,288,334,Not Placed,
4,5,Finance,60.9,74.9,No,248,252,Not Placed,
...,...,...,...,...,...,...,...,...,...
90,91,Business,76.0,77.9,No,326,369,Placed,99500.0
91,92,Computer Science,67.7,86.1,No,421,457,Placed,107000.0
92,93,Engineering,75.3,89.9,No,368,421,Not Placed,
93,94,Engineering,68.1,83.1,No,279,282,Placed,84000.0


#### What mean employability score can we expect from future graduates, with 90 % confidence? (Standard deviation of poplation is 90)

In [76]:
employability = student_grades.loc[:, ["Student ID", "Employability (After)"]]
employability.head()

Unnamed: 0,Student ID,Employability (After)
0,1,276
1,2,410
2,3,119
3,4,334
4,5,252


In [77]:
# get size and mean of sample
sample_size = employability.shape[0]
sample_mean = employability["Employability (After)"].mean()
print(f"Sample size: {sample_size}")
print(f"Sample mean: {sample_mean}") # point estimate

Sample size: 95
Sample mean: 289.34736842105264


In [78]:
# find margin of error for 90 percent confidence interval
z_score = norm.ppf(0.95)
std_error = 90 / np.sqrt(sample_size)
margin_of_error = z_score * std_error
margin_of_error

15.188257922408644

In [79]:
print(f"Lower bound: {sample_mean - margin_of_error}")
print(f"Upper bound: {sample_mean + margin_of_error}")

Lower bound: 274.159110498644
Upper bound: 304.5356263434613


In [80]:
# inbuilt function in scipy stats to compute confidence interval
norm.interval(confidence=0.9, loc=sample_mean, scale=std_error)

(274.159110498644, 304.5356263434613)

#### Find expected annual salary for graduates with alpha 0.05 given average salary for recent MBA graduates in US is 101k USD and standard deviation is 76k USD

In [81]:
salary = student_grades.loc[:, ["Student ID", "Annual Salary"]]
salary.head()

Unnamed: 0,Student ID,Annual Salary
0,1,111000.0
1,2,
2,3,107000.0
3,4,
4,5,


In [82]:
# get size and mean of sample
sample_size = salary["Annual Salary"].count()
sample_mean = salary["Annual Salary"].mean()
std_error = 76000 / np.sqrt(sample_size)
print(f"Sample size: {sample_size}")
print(f"Sample mean: {sample_mean}") # point estimate
print(f"Standard error: {std_error}")

Sample size: 53
Sample mean: 119386.7924528302
Standard error: 10439.402860100367


In [83]:
norm.interval(confidence=0.95, loc=sample_mean, scale=std_error)

(98925.93882692905, 139847.64607873134)