In [None]:
import numpy as np
import pandas as pd
from statsmodels.formula.api import ols
import plotnine as gg

from regression.linear_model import LinearModel

In [None]:
rng = np.random.default_rng(1)
data = pd.DataFrame({
    'x1':rng.normal(size=1000),
    'x2':rng.normal(size=1000)
}).assign(y=lambda dt:rng.normal(1+dt.x1,scale=0.2))

mod=LinearModel('y~bs(x1,3)',data=data).fit(method='OLS')
mod.bootstrap_param(n_resamples=1000)

mod.coef()

In [None]:
ols('y~bs(x1,3)',data=data).fit().summary()

In [None]:
(
    mod.predict(ci_method='bootstrap',alpha=0.05)
    .pipe(gg.ggplot)
    + gg.aes(x='x1')
    + gg.geom_point(gg.aes(y='y'))
    + gg.geom_line(gg.aes(y='pred'),color='red')
    + gg.geom_ribbon(gg.aes(ymin='pred_low',ymax='pred_up'),alpha=0.3)
    + gg.geom_ribbon(gg.aes(ymin='ci_low',ymax='ci_up'),alpha=0.3,fill='red')
    # + gg.stat_function(fun=lambda x:np.sin(x),color='blue')
)

In [None]:
(
    mod.param_dist_boot
    .melt()
    .pipe(gg.ggplot)
    + gg.aes(x='value')
    + gg.geom_density()
    + gg.facet_wrap('variable',ncol=1,scales='free')
)

In [None]:
(
    mod.param_dist_boot
    .pipe(gg.ggplot)
    + gg.aes(x='Intercept',y='x1')
    + gg.geom_point(size=0.1)
    + gg.geom_pointdensity()
)

In [None]:
x

In [None]:
from scipy.stats import gaussian_kde
x = rng.normal(size=1000)
x.sort()
kde = gaussian_kde(x)
kde(x)

In [None]:
1 - np.sum(mod.param_dist_boot.x2.to_numpy()>0.0429)/1000

In [None]:
x = rng.normal(size=1000,loc=1)

def get_p_value(
    sample:np.ndarray,
    statistic:float,
    alternative='two_side'
) -> float:
    sample    = sample.flatten()
    n         = len(sample)
    statistic = statistic
    
    if alternative == 'two_side':
        p_value = (np.sum(statistic < sample) + np.sum(-statistic > sample)) / n
    elif alternative == 'greater':
        p_value = np.sum(-statistic > sample) / n
    elif alternative == 'less':
        p_value = np.sum(statistic < sample) / n
        
    return p_value
get_p_value(x,2.96)

In [None]:
gg.qplot(mod.param_dist_boot.x2)+gg.geom_vline(xintercept=0)

In [None]:
mod.param_dist_boot

In [None]:
tt = mod.param_dist_boot.x2.mean()
diff = np.abs(tt-0.0429)
((mod.param_dist_boot.x2>(tt-diff))&(mod.param_dist_boot.x2<(tt+diff))).sum() / 1000

In [None]:
(mod.param_dist_boot.x1>1.0453).mean() * 2

In [None]:
from scipy.stats import ttest_1samp
ttest_1samp(
    mod.param_dist_boot.x2,
    0
)