# Exercises 4.2

In [1]:
import numpy
import scipy
import math
import rdata

from scipy.special import gamma
from scipy.stats import t, norm

> 4.2.1 Let the observed value of the mean X and of the sample variance of a random
> sample of size 20 from a distribution that is N(μ, σ2) be 81.2 and 26.5, respectively.
> Find respectively 90%, 95% and 99% confidence intervals for μ.

In [2]:
tq95 = t.ppf(1-0.1/2, 20-1)
tq975 = t.ppf(1-0.05/2, 20-1)
tq995 = t.ppf(1-0.01/2, 20-1)

In [3]:
mu90 = [81.2 - tq95*math.sqrt(26.5/20), 81.2 + tq95*math.sqrt(26.5/20)]
mu95 = [81.2 - tq975*math.sqrt(26.5/20), 81.2 + tq975*math.sqrt(26.5/20)]
mu99 = [81.2 - tq995*math.sqrt(26.5/20), 81.2 + tq995*math.sqrt(26.5/20)]

In [4]:
print(mu90)
print(mu95)
print(mu99)

[np.float64(79.20961866195427), np.float64(83.19038133804574)]
[np.float64(78.79074838542353), np.float64(83.60925161457648)]
[np.float64(77.90681695928436), np.float64(84.49318304071565)]


> 4.2.2. Consider the data on the lifetimes of motors given in Exercise 4.1.1. Obtain
> a large sample 95% confidence interval for the mean lifetime of a motor.

In [5]:
hours = [1, 4, 5, 21, 22, 28, 40, 42, 51, 53, 58, 67, 95, 124, 124, 160, 202, 260, 303, 363]
mu = sum(hours)/len(hours)
print("avg:", mu)

S2 = sum([(x - mu) ** 2 for x in hours])/(len(hours) - 1)
print("simple variation:", math.sqrt(S2))

# we can reuse tq975 because the hours list has 20 elements 
interval = [mu - tq975 * math.sqrt(S2/len(hours)), mu + tq975 * math.sqrt(S2/len(hours))]
print(interval)

avg: 101.15
simple variation: 105.40911525550041
[np.float64(51.81701549244916), np.float64(150.48298450755084)]


> 4.2.4. In Example 4.2.4, for the baseball data, we found a confidence interval for
> the mean difference in heights between the pitchers and hitters. In this exercise,
> find the pooled t 95% confidence interval for the mean difference in weights between
> the pitchers and hitters.

In [6]:
bbdata = rdata.read_rda('../Data/bb.rda')['bb']
print(bbdata)

    hand  height  weight  hitind  hitpitind  average
1    1.0    74.0   218.0     1.0        0.0    3.330
2    0.0    75.0   185.0     1.0        1.0    0.286
3    1.0    77.0   219.0     2.0        0.0    3.040
4    0.0    73.0   185.0     1.0        1.0    0.271
5    0.0    69.0   160.0     3.0        1.0    0.242
6    0.0    73.0   222.0     1.0        0.0    3.920
7    0.0    78.0   225.0     1.0        0.0    3.460
8    0.0    76.0   205.0     1.0        0.0    3.420
9    0.0    77.0   230.0     2.0        1.0    0.303
10   0.0    78.0   225.0     1.0        0.0    3.460
11   0.0    76.0   190.0     1.0        0.0    3.750
12   0.0    72.0   180.0     3.0        1.0    0.236
13   0.0    73.0   185.0     1.0        1.0    0.245
14   1.0    73.0   200.0     2.0        0.0    4.800
15   0.0    74.0   195.0     1.0        1.0    0.276
16   0.0    75.0   195.0     1.0        0.0    3.660
17   1.0    72.0   185.0     2.0        1.0    0.300
18   0.0    75.0   190.0     1.0        1.0   

In [7]:
pit = bbdata[bbdata.where(bbdata['hitpitind'] == 0).notna().all(1)]['weight']
hit = bbdata[bbdata.where(bbdata['hitpitind'] == 1).notna().all(1)]['weight']

pitcount = pit.count()
hitcount = hit.count()

pitmean = pit.mean()
hitmean = hit.mean()

# sample variance
pitvar = pit.var()
hitvar = hit.var()

Sp2 = ((pit.count()-1) * pitvar + (hit.count()-1) * hitvar) / (pitcount+hitcount-2)

tq975 = t.ppf(1-0.05/2, pitcount+hitcount-2)

print(pitmean, pitvar, hitmean, hitvar)

interval = [pitmean-hitmean - tq975 * math.sqrt(Sp2*(1/pitcount + 1/hitcount)), 
            pitmean-hitmean + tq975 * math.sqrt(Sp2*(1/pitcount + 1/hitcount))]

print(interval)

201.0 305.68 185.42424242424244 298.12689393939394
[np.float64(6.458849183345414), np.float64(24.692665968169713)]


>4.2.5. In the baseball data set discussed in the last exercise, it was found that out
>of the 59 baseball players, 15 were left-handed. Is this odd, since the proportion of
>left-handed males in America is about 11%? Answer by using (4.2.7) to construct a
>95% approximate confidence interval for p, the proportion of left-handed professional
>baseball players.

In [8]:
left = bbdata.where(bbdata['hand'] == 1)['hand'].count()
right = bbdata.where(bbdata['hand'] == 0)['hand'].count()

zq975 = norm.ppf(1-0.05/2)
avg = left/(left+right)
sigma2 = avg*(1-avg)
interval = [avg - zq975*math.sqrt(sigma2/(left+right)), avg + zq975*math.sqrt(sigma2/(left+right))]

print(interval)

[np.float64(0.14313012952897916), np.float64(0.36534444674220723)]


> 4.2.6. Let X be the mean of a random sample of size n from a distribution that is
> N(μ, 9). Find n such that P(X − 1 < μ < X + 1) = 0.90, approximately.

In [9]:
zq95 = norm.ppf(1-0.05)
N = (3*zq95)**2
print(N, int(N))

24.34989108685871 24


> 4.2.7. Let a random sample of size 17 from the normal distribution N(μ, σ2) yield
> x = 4.7 and s2 = 5.76. Determine a 90% confidence interval for μ.

In [10]:
x = 4.7
n = 17
s2 = 5.76
zq95 = norm.ppf(1-0.05)
interval = [x - zq95*math.sqrt(s2/n), x + zq95*math.sqrt(s2/n)]
print(interval)

[np.float64(3.742554553985709), np.float64(5.657445446014291)]


> 4.2.8. Let X denote the mean of a random sample of size n from a distribution that
> has mean μ and variance σ2 = 10. Find n so that the probability is approximately
> 0.954 that the random interval (X − 1/2 , X + 1/2) includes μ.

In [11]:
quantile = norm.ppf((1+0.954)/2)
sigma2 = 10
n = quantile**2 * 10 * 4
print(int(n))

159


> 4.2.9. Let X1,X2, . . . , X9 be a random sample of size 9 from a distribution that is
> N(μ, σ2).

> (b) If σ is unknown, find the expected value of the length of a 95% confidence
> interval for μ if this interval is based on the random variable
> √9(X − μ)/S.
> Hint: Write E(S) = (σ/√n − 1)E[((n − 1)S2/σ2)1/2].

In [12]:
A = math.sqrt(2) * gamma(9/2) / gamma(4)
tq975 = t.ppf(1-0.05/2, 9-1)
print(A, tq975, 2*A*tq975/3/math.sqrt(8))

2.741624675377657 2.306004135204166 1.4901563212253477


> 4.2.17. It is known that a random variable X has a Poisson distribution with
> parameter μ. A sample of 200 observations from this distribution has a mean equal
> to 3.4. Construct an approximate 90% confidence interval for μ.

In [13]:
zq95 = norm.ppf(0.95)
avg = 3.4
n = 200
interval = [avg - zq95 * math.sqrt(avg/n), avg + zq95 * math.sqrt(avg/n)]
print(interval)

[np.float64(3.1855373255794333), np.float64(3.6144626744205666)]
