In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix, log_loss
)
import numpy as np

In [2]:
data = pd.read_csv("../../Results/BRFSS_2024_model_ready.csv", low_memory=False)
X = data.drop('DIABETE4', axis=1)
y = data['DIABETE4'].astype(int)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [3]:
knn_p2 = KNeighborsClassifier(
    n_neighbors=671,
    metric='minkowski',
    p=2,
    n_jobs=-1
)
knn_p2.fit(X_train, y_train)

0,1,2
,n_neighbors,671
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,-1


In [5]:
y_pred = knn_p2.predict(X_test)
y_proba = knn_p2.predict_proba(X_test)

In [6]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
logloss = log_loss(y_test, y_proba)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision (macro): {precision:.4f}")
print(f"Recall (macro):    {recall:.4f}")
print(f"F1 Score (macro):  {f1:.4f}")
print(f"Log Loss:          {logloss:.4f}")

print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy:  0.8301
Precision (macro): 0.4851
Recall (macro):    0.3343
F1 Score (macro):  0.3044
Log Loss:          0.4567

Classification Report:
               precision    recall  f1-score   support

           1       0.62      0.00      0.01     13162
           3       0.83      1.00      0.91     75226
           4       0.00      0.00      0.00      2261

    accuracy                           0.83     90649
   macro avg       0.49      0.33      0.30     90649
weighted avg       0.78      0.83      0.75     90649


Confusion Matrix:
 [[   40 13122     0]
 [   21 75205     0]
 [    3  2258     0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [7]:
knn_p1 = KNeighborsClassifier(
    n_neighbors=671,
    metric='minkowski',
    p=1,
    n_jobs=-1
)
knn_p1.fit(X_train, y_train)

0,1,2
,n_neighbors,671
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,1
,metric,'minkowski'
,metric_params,
,n_jobs,-1


In [8]:
y_pred = knn_p1.predict(X_test)
y_proba = knn_p1.predict_proba(X_test)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
logloss = log_loss(y_test, y_proba)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision (macro): {precision:.4f}")
print(f"Recall (macro):    {recall:.4f}")
print(f"F1 Score (macro):  {f1:.4f}")
print(f"Log Loss:          {logloss:.4f}")

print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy:  0.8307
Precision (macro): 0.5165
Recall (macro):    0.3361
F1 Score (macro):  0.3082
Log Loss:          0.4520

Classification Report:
               precision    recall  f1-score   support

           1       0.72      0.01      0.02     13162
           3       0.83      1.00      0.91     75226
           4       0.00      0.00      0.00      2261

    accuracy                           0.83     90649
   macro avg       0.52      0.34      0.31     90649
weighted avg       0.79      0.83      0.76     90649


Confusion Matrix:
 [[  115 13047     0]
 [   41 75185     0]
 [    4  2257     0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
