In [19]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)

In [20]:
data = pd.read_csv("../../Results/BRFSS_2024_model_ready.csv", low_memory=False)
X = data.drop('DIABETE4', axis=1)
y = data['DIABETE4'].astype(int)

In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [22]:
X_train = X_train.astype("float32")
X_test  = X_test.astype("float32")

In [23]:
t0 = time.time()
pipe_bal = Pipeline([
    ("scaler", StandardScaler()),     
    ("lr", LogisticRegression(
        solver="lbfgs",               
        class_weight="balanced",      
        C=0.7,                       
        max_iter=400                  
    ))
])

In [24]:

pipe_bal.fit(X_train, y_train)
print(f"Balanced train time: {time.time()-t0:.1f}s")

Balanced train time: 21.1s


In [25]:
y_pred_bal = pipe_bal.predict(X_test)


In [26]:
acc_b  = accuracy_score(y_test, y_pred_bal)
prec_b = precision_score(y_test, y_pred_bal, average="macro", zero_division=0)
rec_b  = recall_score(y_test, y_pred_bal, average="macro", zero_division=0)
f1_b   = f1_score(y_test, y_pred_bal, average="macro", zero_division=0)

In [27]:
print(f"Accuracy:           {acc_b:.4f}")
print(f"Precision (macro):  {prec_b:.4f}")
print(f"Recall (macro):     {rec_b:.4f}")
print(f"F1 Score (macro):   {f1_b:.4f}")

print("\nClassification Report:\n", classification_report(y_test, y_pred_bal, zero_division=0))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_bal))

Accuracy:           0.6261
Precision (macro):  0.4442
Recall (macro):     0.5389
F1 Score (macro):   0.4277

Classification Report:
               precision    recall  f1-score   support

           1       0.34      0.58      0.43     13162
           3       0.94      0.64      0.76     75226
           4       0.05      0.40      0.09      2261

    accuracy                           0.63     90649
   macro avg       0.44      0.54      0.43     90649
weighted avg       0.83      0.63      0.70     90649


Confusion Matrix:
 [[ 7633  2340  3189]
 [14253 48223 12750]
 [  758   608   895]]
