In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from math import sqrt

# Confidence Interval
# <center>mu = xbar +/- z(alpha/2) * sigma/sqrt(n)

In [3]:
aarp = pd.read_excel('TaxReturn.xlsx', squeeze=True)
aarp.head()

0    35.3
1    30.5
2    37.4
3    26.5
4    13.0
Name: Return Preparation Time(Hours), dtype: float64

In [4]:
sigma = 9
xbar = aarp.mean()
n = aarp.size
c = 0.95
alpha = 1 - c
z = stats.norm.ppf(alpha/2)
mu1 = xbar + (z * sigma/sqrt(n))
mu2 = xbar - (z * sigma/sqrt(n))
print('95 % CI {:.2f} - {:.2f}'.format(mu1,mu2))

95 % CI 30.71 - 36.28


In [5]:
mu1, mu2 = stats.norm.interval(0.95, loc = xbar, scale = sigma/sqrt(n))
print('95 % CI {:.2f} - {:.2f}'.format(mu1,mu2))

95 % CI 30.71 - 36.28


In [7]:
nielsen = pd.read_excel('Nielsen.xlsx', squeeze=True)
sigma = 3.5
xbar = nielsen.mean()
n = nielsen.size
c = 0.95

In [9]:
mu1, mu2 = stats.norm.interval(0.95, loc = xbar, scale = sigma/sqrt(n))
print('95 % CI {:.2f} - {:.2f} hrs'.format(mu1,mu2))

95 % CI 8.10 - 8.90 hrs


In [10]:
sigma = 600
xbar = 1599
n = 50
c = 0.95
alpha = 1 - c
z = stats.norm.ppf(alpha/2)
moe = abs((z * sigma/sqrt(n)))
print('Margin of error : {:.2f}'.format(moe))

Margin of error : 166.31


### to decrease margin of error increase sample size

In [11]:
n = 10
mu = 71
sigma = 5

#### a.	What assumption should the researcher be willing to make if a margin of error is desired?
    - pop data is normally distributed

#### b.	Using 95% confidence, what is the margin of error?

In [13]:
c = 0.95
alpha = 1 - c
z = stats.norm.ppf(alpha/2)
moe = abs((z * sigma/sqrt(n)))
print('Margin of error : {:.2f}'.format(moe))

Margin of error : 3.10


#### c.	What is the margin of error if 99% confidence is desired?

In [14]:
c = 0.99
alpha = 1 - c
z = stats.norm.ppf(alpha/2)
moe = abs((z * sigma/sqrt(n)))
print('Margin of error : {:.2f}'.format(moe))

Margin of error : 4.07


In [19]:
xbar = 119155
sigma = 30000
n = 80

#### a.	Develop a 90% confidence interval estimate of the population mean.

In [20]:
c = 0.90
mu1, mu2 = stats.norm.interval(c, loc = xbar, scale = sigma/sqrt(n))
print('{} % CI {:.2f} - {:.2f} '.format(c*100,mu1,mu2))

90.0 % CI 113637.99 - 124672.01 


#### b.	Develop a 95% confidence interval estimate of the population mean.

In [21]:
c = 0.95
mu1, mu2 = stats.norm.interval(c, loc = xbar, scale = sigma/sqrt(n))
print('{} % CI {:.2f} - {:.2f} '.format(c*100,mu1,mu2))

95.0 % CI 112581.08 - 125728.92 


#### c.	Develop a 99% confidence interval estimate of the population mean.

In [22]:
c = 0.99
mu1, mu2 = stats.norm.interval(c, loc = xbar, scale = sigma/sqrt(n))
print('{} % CI {:.2f} - {:.2f} '.format(c*100,mu1,mu2))

99.0 % CI 110515.41 - 127794.59 


In [23]:
sigma = 15
xbar = 33.77
n = 54

#### a.	Using the sample data, what is the margin of error associated with a 95% confidence interval?

In [25]:
c = 0.95
alpha = 1 - c
z = stats.norm.ppf(alpha/2)
moe = abs((z * sigma/sqrt(n)))
print('Margin of error : {:.4f}'.format(moe))

Margin of error : 4.0008


#### b.	Develop a 95% confidence interval for the mean price charged by discount brokers for a trade of 100 shares at $50 per share.

In [27]:
c = 0.95
mu1, mu2 = stats.norm.interval(c, loc = xbar, scale = sigma/sqrt(n))
print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

95.0 % CI 29.7692 - 37.7708 


# Confidence Interval using t distribution
# <center>mu = xbar +/- t(alpha/2) * s/sqrt(n)

In [29]:
n = 65
xbar = 19.5
s = 5.2
ci = [0.90,0.95]
for c in ci:
    mu1, mu2 = stats.t.interval(c,n-1, loc = xbar, scale = s/sqrt(n))
    print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

90.0 % CI 18.4235 - 20.5765 
95.0 % CI 18.2115 - 20.7885 


In [33]:
xbar = 49
n = 100
s = 8.5

#### a.	At 95% confidence, what is the margin of error?

In [34]:
c = 0.95
alpha = 1 - c
t = stats.t.ppf(alpha/2, n-1)
moe = abs((t * s/sqrt(n)))
print('Margin of error : {:.4f}'.format(moe))

Margin of error : 1.6866


#### b.	What is the 95% confidence interval estimate of the population mean flying time for the pilots?

In [35]:
mu1, mu2 = stats.t.interval(c,n-1, loc = xbar, scale = s/sqrt(n))
print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

95.0 % CI 47.3134 - 50.6866 


#### c.	The mean number of hours of flying time for pilots at United Airlines is 36 hours per month. Use your results from part (b) to discuss differences between the flying times for the pilots at the two airlines. 

    - The pilots at United Airlines have a mean number of hours of flying time less the confidence interval found in part b. for Continental Airlines

In [52]:
miami = pd.read_excel('Miami.xlsx', squeeze=True)
xbar = miami.mean()
s = miami.std()
n = miami.size
c = 0.95
mu1, mu2 = stats.t.interval(c,  n-1, loc = xbar , scale = s/sqrt(n))
print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

95.0 % CI 5.7253 - 6.9547 


In [56]:
jobsearch = pd.read_excel('Job Search.xlsx', squeeze=True)
xbar = jobsearch.mean()
s = jobsearch.std()
n = jobsearch.size

#### a.	Provide a point estimate of the population mean number of weeks it takes a worker aged 55 plus to find a job.

In [59]:
xbar

22.0

#### b.	At 95% confidence, what is the margin of error?

In [57]:
c = 0.95
alpha = 1 - c
t = stats.t.ppf(alpha/2, n-1)
moe = abs((t * s/sqrt(n)))
print('Margin of error : {:.4f}'.format(moe))

Margin of error : 3.8014


#### c.	What is the 95% confidence interval estimate of the mean?

In [58]:
mu1, mu2 = stats.t.interval(c,  n-1, loc = xbar , scale = s/sqrt(n))
print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

95.0 % CI 18.1986 - 25.8014 


#### d.	Discuss the degree of skewness found in the sample data. What suggestion would you make for a repeat of this study?

    - Larger sample size

In [60]:
xbar = 273
n = 45
s = 65

#### a.	With 95% confidence, what is the margin of error?

In [61]:
c = 0.95
alpha = 1 - c
t = stats.t.ppf(alpha/2, n-1)
moe = abs((t * s/sqrt(n)))
print('Margin of error : {:.4f}'.format(moe))

Margin of error : 19.5282


#### b.	What is the 95% confidence interval estimate of the population mean?

In [62]:
mu1, mu2 = stats.t.interval(c,  n-1, loc = xbar , scale = s/sqrt(n))
print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

95.0 % CI 253.4718 - 292.5282 


#### c.	Two years ago the average cost of a hotel room in New York City was $229. Discuss the change in cost over the two-year period.

    - the two years ago mean of $ 229 lies significantly below the CI as per part b, which means the pricce has increased significantly in last 2 years

In [65]:
program = pd.read_excel('Program.xlsx', squeeze=True)
xbar = program.mean()
s = program.std()
n = program.size

#### Provide a point estimate.

In [67]:
print('point estimate :{:.4f}'.format(xbar))

point estimate :22.0000


#### a 95% confidence interval for the mean number of programming minutes during a half-hour television sitcom.

In [68]:
c = 0.95
mu1, mu2 = stats.t.interval(c,  n-1, loc = xbar , scale = s/sqrt(n))
print('{} % CI {:.4f} - {:.4f} '.format(c*100,mu1,mu2))

95.0 % CI 21.4762 - 22.5238 
