In [25]:
import pandas as pd 
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.utils import resample

## ADVERTISING

In [72]:
defa = pd.read_csv("Default.csv")
adv = pd.read_csv("Advertising.csv")

In [75]:
x = adv[['TV', 'radio', 'newspaper']].values
y = adv[['sales']].values

lr = LinearRegression()
lr.fit(x,y)

b0 = lr.intercept_
b1 = lr.coef_

In [41]:
y1 = lr.predict(x)

In [42]:
x_mean = np.mean(x)

n = 200
p = 2

RSS = np.sum((y1 - y)**2)
des = RSS / (n-p)

se_b0 = np.sqrt (des * ( (1/n) + ( (x_mean**2) / (np.sum ((X-x_mean)**2)))))
se_b1 = np.sqrt(des / np.sum(((X - x_mean)**2)))

se_b0, se_b1

(0.13276983627246053, 0.0008919757134995063)

## DEFAULT

In [15]:
y = (defa['default'] == "Yes").astype(int)
x = defa[['balance']]

In [22]:
lgr = LogisticRegression()

lgr.fit(x, y)

In [24]:
b0 = lgr.intercept_
b1 = lgr.coef_

y_pred = lgr.predict_proba(x)[:,1] 

p = y_pred

V = np.diagflat(p*(1-p))
X = np.column_stack((np.ones(len(x)), x))
cov = np.linalg.inv(X.T @ V @ X)

se = np.sqrt(np.diag(cov))
se_0 = se[0]
se_1 = se[1]

se_0, se_1

(0.3611685997151162, 0.00022037616946191625)

## Bootstraping

### Default

In [82]:
B = 1000  
boot_coefs_logistic = []

df_default = pd.read_csv("Default.csv")
df_default["default"] = df_default["default"].astype("category")
y_default = df_default["default"] == "Yes"
x_default = df_default["balance"].values.reshape(-1, 1)

n_default = len(x_default) 
for b in range(B):

    idx = np.random.choice(n_default, n_default, replace=True)
    x_b = x_default[idx] 
    y_b = y_default[idx] 

    model_boot_logistic = LogisticRegression()
    model_boot_logistic.fit(x_b, y_b)

    boot_coefs_logistic.append([model_boot_logistic.intercept_[0], model_boot_logistic.coef_[0][0]])

boot_coefs_logistic = np.array(boot_coefs_logistic)

media_boot_logistic = np.mean(boot_coefs_logistic, axis=0)
std_boot_logistic = np.std(boot_coefs_logistic, axis=0)

print("Mean of bootstrap coefficients (Intercept, balance):", media_boot_logistic)
print("Standard deviation of bootstrap coefficients (Intercept, balance):", std_boot_logistic)

Mean of bootstrap coefficients (Intercept, balance): [-1.06725156e+01  5.51255787e-03]
Standard deviation of bootstrap coefficients (Intercept, balance): [3.41827973e-01 2.08201918e-04]


### Adv

In [None]:




B = 1000  
boot_coefs = []

n = len(X)

for b in range(B):
    idx = np.random.choice(n, n, replace=True)
    X_b = X[idx]
    y_b = y[idx]

    model_boot = LinearRegression()
    model_boot.fit(X_b, y_b)

    boot_coefs.append([model_boot.intercept_, *model_boot.coef_])

boot_coefs = np.array(boot_coefs)

media_boot = np.mean(boot_coefs, axis=0)
std_boot = np.std(boot_coefs, axis=0)

print("Mean of bootstrap coefficients (Intercept, TV, radio, newspaper):", media_boot)
print("Standard deviation of bootstrap coefficients (Intercept, TV, radio, newspaper):", std_boot)

In [59]:
x_col = ['TV', 'radio', 'newspaper']
y_col = 'sales'

boots = 1000
coefs = []

for i in range(boots):
    sample = resample(adv, replace=True, n_samples=len(adv))
    
    X_sample = sample[x_col]
    y_sample = sample[y_col]
    
    lr.fit(X_sample, y_sample)
    coefs.append(lr.coef_)

coefs = np.array(coefs)

coef_mean = coefs.mean(axis=0)
coef_std = coefs.std(axis=0)

coef_mean, coef_std


(array([ 0.04573014,  0.18830123, -0.00053918]),
 array([0.00187269, 0.01073338, 0.00630896]))

### Defa

In [70]:
defa["default_num"] = (defa["default"] == "Yes").astype(int)

In [71]:
x_col = ['default_num']
y_col = 'balance'

boots = 1000
coefs = []

for i in range(boots):
    sample = resample(defa, replace=True, n_samples=len(defa))
    
    X_sample = sample[x_col]
    y_sample = sample[y_col]
    
    lr.fit(X_sample, y_sample)
    coefs.append(lr.coef_)

coefs = np.array(coefs)

coef_mean = coefs.mean(axis=0)
coef_std = coefs.std(axis=0)

coef_mean, coef_std

(array([943.13829412]), array([19.41910617]))

In [80]:
# --- 1. Asegurar que Y sea numérica ---
# Convierte yes/no a 1/0
defa['default'] = defa['default'].map({'yes': 1, 'no': 0})

X = defa.drop('default', axis=1).dropna()
y = defa['default'].dropna()

# Si tienes variables categóricas en X:
X = pd.get_dummies(X, drop_first=True)

# --- 2. Bootstrap ---
coef_list = []

for i in range(1000):

    # Remuestreo bootstrap
    X_sample, y_sample = resample(X, y, replace=True, n_samples=len(defa))

    # Ajustar la regresión logística
    model = LogisticRegression(max_iter=1000)
    model.fit(X_sample, y_sample)

    # Guardar coeficientes
    coef_list.append(model.coef_[0])  # vector de coeficientes

# Convertir a DataFrame
coef_defa = pd.DataFrame(coef_list, columns=X.columns)

# --- 3. Calcular medias y desviaciones estándar ---
media_coef = coef_defa.mean()
std_coef = coef_defa.std()

print("Media de los coeficientes:")
print(media_coef)

print("\nDesviación estándar de los coeficientes:")
print(std_coef)


ValueError: Found input variables with inconsistent numbers of samples: [10000, 0]