# Predictive Analytics
# Module 3 - Inferential Statistics
## Demo 3 - Margin of Error & Confidence Interval

In [2]:
import pandas as pd
import numpy as np

### Generate random data & its sample

In [5]:
# Generate random data
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, n_features=5)
df = pd.DataFrame(X, columns=["x1","x2","x3","x4","x5"])
df['y'] = y

# Generate sample
sample_size = 500
sample = df.sample(n=sample_size, random_state=1)
sample.head()

Unnamed: 0,x1,x2,x3,x4,x5,y
507,-0.286468,0.135713,-0.259443,-0.702306,0.406754,-43.333444
818,-0.319466,-0.33596,-0.338797,-0.042322,1.278414,25.551393
452,-0.876602,-0.16179,1.109192,-0.116126,-0.135569,-32.026281
368,-1.093123,-0.673166,1.31723,0.267235,1.749881,72.4246
242,1.157555,-1.426662,0.143651,-0.488784,-0.970141,-71.740872


### Calculate Z-critical, Margin of Error & CI

In [6]:
import math
from scipy import stats

sample_mean = sample.y.mean()
np.random.seed(1)

"""
signifies the number of standard deviations 
you'd have to go from the mean of the normal 
distribution to capture the proportion of the
data associated with the desired confidence level
"""
z_critical = stats.norm.ppf(q = 0.975)  # Get the z-critical value*

pop_stdev = sample.y.std()  # Get the population standard deviation

margin_of_error = z_critical * (pop_stdev/math.sqrt(sample_size))

confidence_interval = (sample_mean - margin_of_error,
                       sample_mean + margin_of_error)  

print("Z-critical value:", z_critical)
print("Margin of Error:", margin_of_error)
print("Confidence Interval:", confidence_interval)

Z-critical value: 1.959963984540054
Margin of Error: 10.455669347523331
Confidence Interval: (-12.466336898244386, 8.445001796802277)
