### Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### Sample Patient Data

In [2]:
data = pd.DataFrame({
    'patient_id': ['P001', 'P002', 'P003', 'P004', 'P005', 'P006', 'P007', 'P008', 'P009', 'P010'],
    'age': [25, 45, 30, 50, 60, 28, 55, 40, 35, 65],
    'bmi': [22.5, 28.1, 26.0, 31.5, 33.0, 24.8, 29.5, 27.0, 25.5, 34.5],
    'blood_pressure': [120, 135, 128, 145, 150, 118, 140, 130, 125, 155],
    'glucose_level': [85, 140, 95, 160, 170, 90, 150, 130, 100, 180],
    'smoking': ['No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes'],
    'physical_activity': ['High', 'Low', 'Medium', 'Low', 'Low', 'High', 'Low', 'Medium', 'High', 'Low'],
    'disease': [0, 1, 0, 1, 1, 0, 1, 0, 0, 1]  # Target variable (0 = Healthy, 1 = Diseased)
})

# Display first five rows
print(data.head())

  patient_id  age   bmi  blood_pressure  glucose_level smoking  \
0       P001   25  22.5             120             85      No   
1       P002   45  28.1             135            140     Yes   
2       P003   30  26.0             128             95      No   
3       P004   50  31.5             145            160     Yes   
4       P005   60  33.0             150            170     Yes   

  physical_activity  disease  
0              High        0  
1               Low        1  
2            Medium        0  
3               Low        1  
4               Low        1  


### Encode Categorical Variables

In [3]:
label_enc = LabelEncoder()
data['smoking'] = label_enc.fit_transform(data['smoking'])          # No=0, Yes=1
data['physical_activity'] = label_enc.fit_transform(data['physical_activity'])  # Low/Medium/High → numeric

### Split Data 

In [4]:
X = data.drop(['patient_id', 'disease'], axis=1)
y = data['disease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train Classification Models

#### 1. Logistic Regression 

In [5]:
log_reg = LogisticRegression() 
log_reg.fit(X_train, y_train) 
y_pred_log = log_reg.predict(X_test)

#### 2. Decision Tree Classifier

In [6]:
dt_clf = DecisionTreeClassifier() 
dt_clf.fit(X_train, y_train) 
y_pred_dt = dt_clf.predict(X_test)

#### 3. Random Forest Classifier

In [7]:
rf_clf = RandomForestClassifier(n_estimators=100) 
rf_clf.fit(X_train, y_train) 
y_pred_rf = rf_clf.predict(X_test)

#### 4️. Support Vector Machine (SVM)

In [8]:
svm_clf = SVC() 
svm_clf.fit(X_train, y_train) 
y_pred_svm = svm_clf.predict(X_test)

#### 5. K-Nearest Neighbors (KNN)

In [9]:
knn_clf = KNeighborsClassifier(n_neighbors=5) 
knn_clf.fit(X_train, y_train) 
y_pred_knn = knn_clf.predict(X_test)

### Model Evaluation 

In [10]:
models = { 
    "Logistic Regression": y_pred_log, 
    "Decision Tree": y_pred_dt, 
    "Random Forest": y_pred_rf, 
    "SVM": y_pred_svm, 
    "KNN": y_pred_knn 
} 
 
for model_name, y_pred in models.items(): 
    print(f"\n{model_name} Performance:") 
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}") 
    print(f"Precision: {precision_score(y_test, y_pred):.2f}") 
    print(f"Recall: {recall_score(y_test, y_pred):.2f}") 
    print(f"F1 Score: {f1_score(y_test, y_pred):.2f}") 
    print("-" * 50)


Logistic Regression Performance:
Accuracy: 0.50
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
--------------------------------------------------

Decision Tree Performance:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
--------------------------------------------------

Random Forest Performance:
Accuracy: 0.50
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
--------------------------------------------------

SVM Performance:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
--------------------------------------------------

KNN Performance:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
