In [1]:
import pandas as pd
# Load the cleaned healthcare dataset
data = pd.read_csv('Cleaned_healthcare_dataset.csv')

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define features (X) and target (y)
X = data.drop(columns=['Test Results_0', 'Test Results_1', 'Test Results_2', 'Name', 'Date of Admission', 'Discharge Date'])
y = data[['Test Results_0', 'Test Results_1', 'Test Results_2']].idxmax(axis=1)  # Revert one-hot encoding to class labels
y = y.map({'Test Results_0': 'Normal', 'Test Results_1': 'Abnormal', 'Test Results_2': 'Inconclusive'})

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
# Combine the one-hot encoded columns into a single target variable
# This will assign a label based on the index of the column with a value of 1
y = data[['Test Results_0', 'Test Results_1', 'Test Results_2']].idxmax(axis=1)

# Map the column names back to original class labels if desired (optional)
y = y.map({'Test Results_0': 'Normal', 'Test Results_1': 'Abnormal', 'Test Results_2': 'Inconclusive'})

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Initialize and train the Logistic Regression model
logreg = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
logreg.fit(X_train, y_train)

# Predict on test data
y_pred_logreg = logreg.predict(X_test)

# Evaluate the model
print("Logistic Regression Results")
print("Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Classification Report:\n", classification_report(y_test, y_pred_logreg))




Logistic Regression Results
Accuracy: 0.9008823529411765
Classification Report:
               precision    recall  f1-score   support

    Abnormal       0.30      0.11      0.16       887
Inconclusive       0.90      0.97      0.94      7566
      Normal       1.00      1.00      1.00      1747

    accuracy                           0.90     10200
   macro avg       0.74      0.69      0.70     10200
weighted avg       0.87      0.90      0.88     10200



In [5]:
##Using SMOTE

In [4]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Apply SMOTE to balance the classes
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split resampled data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the Logistic Regression model on SMOTE-balanced data
logistic_model_smote = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
logistic_model_smote.fit(X_train, y_train)

# Predict on the test set
y_pred_smote = logistic_model_smote.predict(X_test)

# Evaluate the model
print("Logistic Regression with SMOTE Results")
print("Accuracy:", accuracy_score(y_test, y_pred_smote))
print("Classification Report:\n", classification_report(y_test, y_pred_smote))




Logistic Regression with SMOTE Results
Accuracy: 0.9406701909214032
Classification Report:
               precision    recall  f1-score   support

    Abnormal       0.94      0.88      0.91      7572
Inconclusive       0.89      0.94      0.91      7764
      Normal       1.00      1.00      1.00      7553

    accuracy                           0.94     22889
   macro avg       0.94      0.94      0.94     22889
weighted avg       0.94      0.94      0.94     22889



In [10]:
pip install imbalanced-learn


Note: you may need to restart the kernel to use updated packages.


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize and train the Random Forest model
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Predict on test data
y_pred_rf = rf.predict(X_test)

# Evaluate the model
print("Random Forest Results")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Results
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

    Abnormal       1.00      1.00      1.00      7572
Inconclusive       1.00      1.00      1.00      7764
      Normal       1.00      1.00      1.00      7553

    accuracy                           1.00     22889
   macro avg       1.00      1.00      1.00     22889
weighted avg       1.00      1.00      1.00     22889



In [15]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize and train the Gradient Boosting model
gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)

# Predict on test data
y_pred_gb = gb.predict(X_test)

# Evaluate the model
print("Gradient Boosting Results")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb))


Gradient Boosting Results
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      7561
           1       1.00      1.00      1.00      7564
           2       1.00      1.00      1.00      7764

    accuracy                           1.00     22889
   macro avg       1.00      1.00      1.00     22889
weighted avg       1.00      1.00      1.00     22889



In [5]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Initialize and train the SVM model
svm = SVC(kernel='linear', probability=True, random_state=42)
svm.fit(X_train, y_train)

# Predict on test data
y_pred_svm = svm.predict(X_test)

# Evaluate the model
print("Support Vector Machine (SVM) Results")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))


Support Vector Machine (SVM) Results
Accuracy: 0.943247848311416
Classification Report:
               precision    recall  f1-score   support

    Abnormal       1.00      0.83      0.91      7572
Inconclusive       0.86      1.00      0.92      7764
      Normal       1.00      1.00      1.00      7553

    accuracy                           0.94     22889
   macro avg       0.95      0.94      0.94     22889
weighted avg       0.95      0.94      0.94     22889

