In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load the dataset

df = pd.read_csv('diabetic.csv')

In [3]:
# Dataset Information

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101766 entries, 0 to 101765
Data columns (total 50 columns):
 #   Column                    Non-Null Count   Dtype 
---  ------                    --------------   ----- 
 0   encounter_id              101766 non-null  int64 
 1   patient_nbr               101766 non-null  int64 
 2   race                      101766 non-null  object
 3   gender                    101766 non-null  object
 4   age                       101766 non-null  object
 5   weight                    101766 non-null  object
 6   admission_type_id         101766 non-null  int64 
 7   discharge_disposition_id  101766 non-null  int64 
 8   admission_source_id       101766 non-null  int64 
 9   time_in_hospital          101766 non-null  int64 
 10  payer_code                101766 non-null  object
 11  medical_specialty         101766 non-null  object
 12  num_lab_procedures        101766 non-null  int64 
 13  num_procedures            101766 non-null  int64 
 14  num_

In [4]:
# Data Preprocessing
df.replace('?', pd.NA, inplace=True)
df.drop(columns=['weight', 'payer_code', 'medical_specialty'], inplace=True)
for col in df.select_dtypes(include='object').columns:
    df[col].fillna(df[col].mode()[0], inplace=True)
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])
  

In [5]:
# Splitting the data into features and target

X = df.drop('readmitted', axis=1)
y = df['readmitted']

In [6]:
# Splitting the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Random Forest Classifier

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)
print("Random Forest Classifier Results:")
print("Accuracy:", accuracy_score(y_test, rf_preds))
print("Classification Report:\n", classification_report(y_test, rf_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_preds))
print("-"*50)

Random Forest Classifier Results:
Accuracy: 0.6000294782352363
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.02      0.03      2285
           1       0.52      0.45      0.48      7117
           2       0.63      0.82      0.72     10952

    accuracy                           0.60     20354
   macro avg       0.53      0.43      0.41     20354
weighted avg       0.57      0.60      0.56     20354

Confusion Matrix:
 [[  37  961 1287]
 [  36 3193 3888]
 [  13 1956 8983]]
--------------------------------------------------


In [8]:
# Logistic Regression Classifier

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)
print("Logistic Regression Classifier Results:")
print("Accuracy:", accuracy_score(y_test, lr_preds))
print("Classification Report:\n", classification_report(y_test, lr_preds, zero_division=1))
print("Confusion Matrix:\n", confusion_matrix(y_test, lr_preds))
print("-"*50)

Logistic Regression Classifier Results:
Accuracy: 0.5383217058072124
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00      2285
           1       0.44      0.15      0.22      7117
           2       0.55      0.91      0.69     10952

    accuracy                           0.54     20354
   macro avg       0.66      0.35      0.30     20354
weighted avg       0.56      0.54      0.45     20354

Confusion Matrix:
 [[   0  300 1985]
 [   0 1036 6081]
 [   0 1031 9921]]
--------------------------------------------------
