# Bootstrap

Import all the packages and functions needed

In [ ]:
import numpy as np
import pandas as pd

# print numpy arrays with precision 4
np.set_printoptions(precision=4)

Import the data from the file Portfolio.csv. 

In [ ]:
portfolio = pd.read_csv('./Data/Portfolio.csv', index_col=0)
print(len(portfolio))
portfolio.head()

The goal is to determine the fraction of money $\alpha$ to invest in one asset X
and (1-$\alpha$) in Y , such that the risk $Var (\alpha X + (1 - \alpha)Y )$ is minimized. 

$\Rightarrow \alpha=\frac{\sigma^2_Y - \sigma_{XY}}{\sigma^2_X + \sigma^2_Y - 2\sigma_{XY}}$

BUT in reality, these variances and covariances are not known.

a) Write a function that returns the bootstrap estimates $\hat{\alpha}=\frac{\hat{\sigma}^2_Y - \hat{\sigma}_{XY}}{\hat{\sigma}^2_X + \hat{\sigma}^2_Y - 2\hat{\sigma}_{XY}}$.

In [ ]:
def alpha(df, num_samples=100):
    indices = np.random.choice(df.index, num_samples, replace=True)
    X = df.X[indices].values
    Y = df.Y[indices].values    

    return (np.var(Y) - np.cov(X, Y)[0, 1]) / (np.var(X) + np.var(Y) - 2 * np.cov(X, Y)[0, 1])

np.random.seed(0)  # For reproducibility

alpha_est = alpha(portfolio)
print(f"Alpha estimate: {alpha_est:.4f}")

b) Write a function that computes the SE of the estimate $\hat{\alpha}$.

In [ ]:
def boot(data, stasfunc, num_bootstrap_samples=1000):
    stat_samples = []
    for sample in range(num_bootstrap_samples):
            stat_samples.append(stasfunc(data))

    se_estimate = np.std(stat_samples, axis = 0)

    print('\nBootstrap standard error estimate: {:.4f}'.format(se_estimate))
    return se_estimate

np.random.seed(0)  # For reproducibility
boot(portfolio, alpha, 1000)