In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, classification_report

In [18]:
data = pd.read_csv("pneumonia_blood_test_data.csv")


In [19]:
X = data[['Age', 'Gender', 'Symptoms', 'White_Blood_Cell_Count', 'CRP_Level', 'Procalcitonin_Level', 'ESR_Level']]
y = data['Diagnosis']


In [20]:
categorical_columns = ['Gender', 'Symptoms']


In [21]:
column_transformer = ColumnTransformer([('one_hot_encoder', OneHotEncoder(), categorical_columns)], remainder='passthrough')
X_encoded = column_transformer.fit_transform(X)

In [22]:
X_encoded = X_encoded.toarray()


In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)


In [24]:
classifiers = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

In [27]:
for clf_name, clf in classifiers.items():
    clf.fit(X_train, y_train)


In [28]:
    y_pred = clf.predict(X_test)


In [29]:
    accuracy = accuracy_score(y_test, y_pred)


In [31]:
print(f"{clf_name} Accuracy:", accuracy)
print(f"{clf_name} Classification Report:")
print(classification_report(y_test, y_pred))
print("\n")

Decision Tree Accuracy: 0.8180987202925045
Decision Tree Classification Report:
              precision    recall  f1-score   support

No Pneumonia       0.88      0.88      0.88       820
   Pneumonia       0.64      0.64      0.64       274

    accuracy                           0.82      1094
   macro avg       0.76      0.76      0.76      1094
weighted avg       0.82      0.82      0.82      1094





In [32]:
# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
}

# Train and evaluate classifiers
for clf_name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = clf.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Print results for each classifier
    print(f"{clf_name} Accuracy:", accuracy)
    print(f"{clf_name} Classification Report:")
    print(classification_report(y_test, y_pred))
    print("\n")


Random Forest Accuracy: 0.8327239488117002
Random Forest Classification Report:
              precision    recall  f1-score   support

No Pneumonia       0.82      1.00      0.90       820
   Pneumonia       0.97      0.34      0.51       274

    accuracy                           0.83      1094
   macro avg       0.89      0.67      0.70      1094
weighted avg       0.86      0.83      0.80      1094





In [33]:
naive_bayes = GaussianNB()

# Train the classifier
naive_bayes.fit(X_train, y_train)

# Predict on the test set
y_pred = naive_bayes.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print results for Naive Bayes classifier
print("Naive Bayes Accuracy:", accuracy)
print("Naive Bayes Classification Report:")
print(classification_report(y_test, y_pred))

Naive Bayes Accuracy: 0.8793418647166362
Naive Bayes Classification Report:
              precision    recall  f1-score   support

No Pneumonia       0.91      0.93      0.92       820
   Pneumonia       0.78      0.72      0.75       274

    accuracy                           0.88      1094
   macro avg       0.84      0.83      0.84      1094
weighted avg       0.88      0.88      0.88      1094

