In [55]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Binarizer
import pandas as pd
import numpy as np

In [56]:
# Load diabetes dataset
data = load_diabetes(as_frame=True)
df = data.frame

In [57]:
# Binarize target for classification (e.g., above or below median)
y = Binarizer(threshold=np.median(df['target'])).fit_transform(df[['target']]).ravel()
X = df.drop(columns=['target'])

In [58]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers
clf1 = LogisticRegression(max_iter=1000)
clf2 = DecisionTreeClassifier()

# Train models
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)

# Predictions
y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)


In [66]:
result = pd.DataFrame()
result['Actual Label'] = y_test
result['Logistic Regression Prediction'] = y_pred1
result['Decision Tree Prediction'] = y_pred2

result.sample(10)

Unnamed: 0,Actual Label,Logistic Regression Prediction,Decision Tree Prediction
86,0.0,0.0,1.0
52,0.0,0.0,1.0
77,1.0,1.0,1.0
38,1.0,1.0,1.0
76,1.0,1.0,1.0
0,1.0,1.0,0.0
25,0.0,1.0,1.0
42,1.0,1.0,0.0
65,1.0,1.0,1.0
14,0.0,0.0,0.0


In [63]:
# Evaluate Logistic Regression
print("-" * 50)
print("Logistic Regression Confusion Matrix")
print(pd.DataFrame(confusion_matrix(y_test, y_pred1), columns=["Pred 0", "Pred 1"]))

print("=" * 50)
print("Precision -", precision_score(y_test, y_pred1))
print("Recall -", recall_score(y_test, y_pred1))
print("F1 score -", f1_score(y_test, y_pred1))


--------------------------------------------------
Logistic Regression Confusion Matrix
   Pred 0  Pred 1
0      37      12
1      11      29
Precision - 0.7073170731707317
Recall - 0.725
F1 score - 0.7160493827160493


In [64]:

# Evaluate Decision Tree
print("-" * 50)
print("Decision Tree Confusion Matrix")
print(pd.DataFrame(confusion_matrix(y_test, y_pred2), columns=["Pred 0", "Pred 1"]))

print("=" * 50)
print("Precision -", precision_score(y_test, y_pred2))
print("Recall -", recall_score(y_test, y_pred2))
print("F1 score -", f1_score(y_test, y_pred2))
print("-" * 50)


--------------------------------------------------
Decision Tree Confusion Matrix
   Pred 0  Pred 1
0      37      12
1      13      27
Precision - 0.6923076923076923
Recall - 0.675
F1 score - 0.6835443037974683
--------------------------------------------------
