In [16]:
import numpy as np

In [17]:
import pandas as pd
import statsmodels.api as sm
from ISLP import load_data
from ISLP.models import summarize
from sklearn.model_selection import train_test_split

1)

In [2]:
default = load_data('Default')

In [3]:
y = (default['default'] == 'Yes').astype(int)

In [4]:
X = default[['income', 'balance']]
X = sm.add_constant(X)

1.a

In [5]:
model = sm.GLM(y, X, family=sm.families.Binomial()).fit()
print("Logistic Regression (GLM) ")
print(summarize(model))

Logistic Regression (GLM) 
              coef   std err       z  P>|z|
const   -11.540500  0.435000 -26.544    0.0
income    0.000021  0.000005   4.174    0.0
balance   0.005600  0.000000  24.835    0.0


In [6]:
glm_se = model.bse[['income', 'balance']]
print("\nStandard Error from sm.GLM():")
print(glm_se)


Standard Error from sm.GLM():
income     0.000005
balance    0.000227
dtype: float64


1.b

In [7]:
def boot_fn(data, index):
    y_boot = (data.loc[index, 'default'] == 'Yes').astype(int)
    X_boot = data.loc[index, ['income', 'balance']]
    X_boot = sm.add_constant(X_boot)
    model_boot = sm.GLM(y_boot, X_boot, family=sm.families.Binomial()).fit()
    return model_boot.params[['income', 'balance']]

1.c

In [8]:
np.random.seed(1)
B = 1000  
boot_estimates = np.zeros((B, 2))

for b in range(B):
    sample_idx = np.random.choice(len(default), len(default), replace=True)
    boot_estimates[b, :] = boot_fn(default, sample_idx)

boot_se = boot_estimates.std(axis=0)
print("\nBootstrap Standard Error:")
print(f"Income: {boot_se[0]:.6f}, Balance: {boot_se[1]:.6f}")



Bootstrap Standard Error:
Income: 0.000005, Balance: 0.000233


1.d

In [9]:
print("\nComparison of Standard Error:")
print(f"GLM SE (Income): {glm_se['income']:.6f}, Bootstrap SE (Income): {boot_se[0]:.6f}")
print(f"GLM SE (Balance): {glm_se['balance']:.6f}, Bootstrap SE (Balance): {boot_se[1]:.6f}")

print("\nComment:")
print("The standard error from sm.GLM() and bootstrap are very close. "
      "The similarity here shows the model is stable and both methods are agree.")


Comparison of Standard Error:
GLM SE (Income): 0.000005, Bootstrap SE (Income): 0.000005
GLM SE (Balance): 0.000227, Bootstrap SE (Balance): 0.000233

Comment:
The standard error from sm.GLM() and bootstrap are very close. The similarity here shows the model is stable and both methods are agree.


task2 

2.a


In [10]:
model_full = sm.GLM(y, X, family=sm.families.Binomial()).fit()
print("Logistic Regression Result of  (Full Data)")
print(model_full.summary())

Logistic Regression Result of  (Full Data)
                 Generalized Linear Model Regression Results                  
Dep. Variable:                default   No. Observations:                10000
Model:                            GLM   Df Residuals:                     9997
Model Family:                Binomial   Df Model:                            2
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -789.48
Date:                Wed, 01 Oct 2025   Deviance:                       1579.0
Time:                        09:35:16   Pearson chi2:                 6.95e+03
No. Iterations:                     9   Pseudo R-squ. (CS):             0.1256
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const    

2.b

In [11]:
np.random.seed(1) 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=1)

In [12]:
model_val = sm.GLM(y_train, X_train, family=sm.families.Binomial()).fit()

In [13]:
y_pred_prob = model_val.predict(X_val)

In [14]:
y_pred = (y_pred_prob > 0.5).astype(int)

In [15]:
test_error = np.mean(y_pred != y_val)

print("\nValidation Set Result:")
print(f"Test Error (Validation Mis-classification of Rate): {test_error:.4f}")



Validation Set Result:
Test Error (Validation Mis-classification of Rate): 0.0250
