### Compare Bayesian Probit model to classical Logistic regression

In [4]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from bayes_linear.bprobit import BayesProbit, BayesProbit_MCMC, BayesProbit_VI

# Generate the data using the BayesProbit class
bayes_probit = BayesProbit(verbose=True)
N = 10000  # Number of data points
D = 10     # Number of features
y, X, true_theta = bayes_probit.simulate_DGP(N=N, D=D, mu_theta=0, set_seed=42)

print(np.unique(y, return_counts=True))

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fit the Bayesian Probit model
#bayes_model = BayesProbit_MCMC(N_sim=5000, burn_in=1000, verbose=True, pred_mode=['full'])
bayes_model = BayesProbit_VI(max_iter=1000, epsilon_conv=1e-4, verbose=True)

bayes_model.fit(X_train, y_train)

# Predict with the Bayesian Probit model
y_pred_bayes = bayes_model.predict(X_test)

# Fit a logistic regression model
logistic_model = LogisticRegression(random_state=42)
logistic_model.fit(X_train, y_train)

# Predict with the logistic regression model
y_pred_logistic = logistic_model.predict(X_test)

# Print classification reports
print("Classification Report for Bayesian Probit Model:")
print(classification_report(y_test, y_pred_bayes))

print("Classification Report for Logistic Regression Model:")
print(classification_report(y_test, y_pred_logistic))


(array([0, 1]), array([4137, 5863]))
Iter. 10 Lower Bound -2189.6795 - Delta LB 34.0317
Iter. 20 Lower Bound -2065.0435 - Delta LB 4.3064
Iter. 30 Lower Bound -2047.5047 - Delta LB 0.6552
Iter. 40 Lower Bound -2044.7978 - Delta LB 0.1022
Iter. 50 Lower Bound -2044.3744 - Delta LB 0.016
Iter. 60 Lower Bound -2044.3081 - Delta LB 0.0025
Iter. 70 Lower Bound -2044.2977 - Delta LB 0.0004
Converged!

Classification Report for Bayesian Probit Model:
              precision    recall  f1-score   support

           0       0.72      0.84      0.78      1239
           1       0.87      0.77      0.82      1761

    accuracy                           0.80      3000
   macro avg       0.80      0.81      0.80      3000
weighted avg       0.81      0.80      0.80      3000

Classification Report for Logistic Regression Model:
              precision    recall  f1-score   support

           0       0.77      0.76      0.77      1239
           1       0.83      0.84      0.84      1761

    accu

Optional: Transform X space according to polynomial basis transformation

In [6]:
from bayes_linear.bprobit import design_matrix

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

dmat = design_matrix(n_features=3, add_bias=False)

X_train = dmat.fit_transform(X_train)
X_test = dmat.transform(X_test)

# Fit the Bayesian Probit model
#bayes_model = BayesProbit_MCMC(N_sim=5000, burn_in=1000, verbose=True, pred_mode=['full'])
bayes_model = BayesProbit_VI(max_iter=1000, epsilon_conv=1e-4, verbose=True)

fitted = bayes_model.fit(X_train, y_train)

Iter. 10 Lower Bound -2173.0405 - Delta LB 34.5207
Iter. 20 Lower Bound -2045.7585 - Delta LB 4.4453
Iter. 30 Lower Bound -2027.5063 - Delta LB 0.6905
Iter. 40 Lower Bound -2024.6283 - Delta LB 0.1101
Iter. 50 Lower Bound -2024.1681 - Delta LB 0.0176
Iter. 60 Lower Bound -2024.0944 - Delta LB 0.0028
Iter. 70 Lower Bound -2024.0825 - Delta LB 0.0005
Converged!



In [7]:
# Predict with the Bayesian Probit MCMC model
y_pred_bayes = bayes_model.predict(X_test)

# Fit a logistic regression model
logistic_model = LogisticRegression(random_state=42)
logistic_model.fit(X_train, y_train)

# Predict with the logistic regression model
y_pred_logistic = logistic_model.predict(X_test)

# Print classification reports
print("Classification Report for Bayesian Probit Model:")
print(classification_report(y_test, y_pred_bayes))

print("Classification Report for Logistic Regression Model:")
print(classification_report(y_test, y_pred_logistic))


Classification Report for Bayesian Probit Model:
              precision    recall  f1-score   support

           0       0.73      0.82      0.77      1239
           1       0.86      0.79      0.82      1761

    accuracy                           0.80      3000
   macro avg       0.80      0.80      0.80      3000
weighted avg       0.81      0.80      0.80      3000

Classification Report for Logistic Regression Model:
              precision    recall  f1-score   support

           0       0.77      0.76      0.76      1239
           1       0.83      0.84      0.84      1761

    accuracy                           0.81      3000
   macro avg       0.80      0.80      0.80      3000
weighted avg       0.81      0.81      0.81      3000

