In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import class_weight
from sklearn.metrics import classification_report
import joblib

data=pd.read_csv('Data-set-cleaned.csv')

# Separate features and target
X = data.drop('readmitted', axis=1)
y = data['readmitted']

# Handling class imbalance
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights = dict(enumerate(class_weights))

# Split the dataset into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [3]:
# Define the Random Forest model with GridSearchCV
rf = RandomForestClassifier(class_weight=class_weights)
param_grid = {
     'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=StratifiedKFold(n_splits=5), scoring='f1_macro',verbose=1)
grid_search.fit(X_train, y_train)

# Best parameters and estimator
best_rf = grid_search.best_estimator_

# Predict on the test set using the best estimator
y_pred_rf = best_rf.predict(X_test)

# Print classification report for Random Forest
print("Classification Report for Random Forest:")
print(classification_report(y_test, y_pred_rf))

# Evaluate the best model using cross-validation
rf_scores = cross_val_score(best_rf, X_train, y_train, cv=10, scoring='accuracy')

# Print results
print("Random Forest 10-fold CV Accuracy: %0.2f (+/- %0.2f)" % (rf_scores.mean(), rf_scores.std() * 2))

# Save the best model
joblib.dump(best_rf, 'best_random_forest_model.pkl')

Fitting 5 folds for each of 27 candidates, totalling 135 fits
Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.91      0.72      0.81     25785
           1       0.18      0.48      0.26      3336

    accuracy                           0.69     29121
   macro avg       0.55      0.60      0.53     29121
weighted avg       0.83      0.69      0.74     29121

Random Forest 10-fold CV Accuracy: 0.70 (+/- 0.01)


['best_random_forest_model.pkl']