In [1]:
import numpy as np
import pandas as pd
import scipy.stats as ss

np.random.seed(1337)
data = pd.DataFrame({'x': ss.norm.rvs(size=100)})
data[:10]

Unnamed: 0,x
0,-0.703187
1,-0.490282
2,-0.321814
3,-1.755079
4,0.206664
5,-2.011265
6,-0.557251
7,0.337217
8,1.548836
9,-1.370737


In [2]:
from typing import Callable
import pandas

def bootstrap_t(data: pandas.DataFrame, statistic: Callable[[pandas.DataFrame], float], iterations=1000, std_iter=50, alpha=0.02):
    # ...
    theta = statistic(data)
    res = []
    sts = []
    for i in range(iterations):
        resample = data.sample(frac=1, replace=True, random_state=i)
        res.append(statistic(resample) - theta)
        # ...
        stdd = []
        for j in range(std_iter):
            reresample = resample.sample(frac=1, replace=True, random_state=i*std_iter+j)
            # ...
            stdd.append(statistic(reresample))
        sts.append(np.std(stdd, ddof=1))
    # ...
    stdteta = np.std(res, ddof=1)
    res = np.array(res)
    res /= sts
    t_1, t_2 = np.quantile(res, alpha / 2), np.quantile(res, 1 - alpha / 2)
    return theta - stdteta * t_2, theta - stdteta * t_1

In [3]:
bootstrap_t(data, np.median)

(-0.29599601030321415, 0.35801561621958494)

# Домашка.


In [4]:
law_school_data = [(576, 3.39), (635, 3.30), (558, 2.81), (578, 3.03), (666, 3.44),
                   (580, 3.07), (555, 3.0), (661, 3.43), (651, 3.36), (605, 3.13),
                   (653, 3.12), (575, 2.74), (545, 2.76), (572, 2.88), (594, 2.96)]
law_school_df = pd.DataFrame(law_school_data, columns=['LSAT', 'GPA'])

In [8]:
law_school_df.shape

(15, 2)

In [7]:
rho = law_school_df.corr().values[0, 1]
rho

0.7763744912894074

In [13]:
R = 1 - rho ** 2

alpha = 0.01

t_1 = ss.norm.ppf(alpha)
t_2 = ss.norm.ppf(1 - alpha)
print((t_2 - t_1) * R / np.sqrt(15))

0.4772158876048114


In [14]:
print(t_1, t_2, np.sqrt(15), rho)

-2.3263478740408408 2.3263478740408408 3.872983346207417 0.7763744912894074


In [15]:
print(0.895429-0.199658)

0.695771


In [19]:
statistic = lambda x: np.arctanh(x.corr().values[0, 1])

In [20]:
left, right = np.tanh(bootstrap_t(law_school_df, statistic))

In [22]:

print(right - left)
print(left, right)

1.1117557683983375
-0.16680328200355332 0.9449524863947842


In [64]:
def bca(data, statistic, iterations=1000, alpha=0.02):
    # ...
    theta = statistic(data)
    bt = []
    for i in range(iterations):
        resample = data.sample(frac=1, replace=True, random_state=i)
        bt.append(statistic(resample))
        # ...
    bt = np.array(bt)
    z = ss.norm.ppf(len(bt[bt < theta]) / iterations)
    retheta = []
    for i in range(data.shape[0]):
        redata = data.drop(i)
        retheta.append(statistic(redata))
    retheta = np.array(retheta)
    ttheta = retheta.mean()
    retheta = ttheta - retheta
    num = sum(retheta ** 3)
    denum = 6 * sum(retheta ** 2) ** 1.5
    
    a = num
    za = ss.norm.ppf(alpha / 2)
    z1ma = ss.norm.ppf(1 - alpha / 2)
    z1 = ss.norm.cdf(z + (z + za) / (1 - a * (z + za)))
    z2 = ss.norm.cdf(z + (z + z1ma) / (1 - a * (z + z1ma)))
    print(theta)
    return np.quantile(bt, z1), np.quantile(bt, z2)
    # ...
    

In [65]:
stat2 = lambda x: x.corr().values[0, 1]
left, right = bca(law_school_df, stat2)

0.7763744912894074


In [66]:
print(left, right)
print(right - left)

0.19581286046536708 0.9220694692786151
0.726256608813248


array([3, 2, 1])