In [48]:
import pandas as pd

df = pd.read_csv('FullData.csv')
df.columns = df.columns.str.strip()
df = df.drop(columns=['Others','Family.History', 'CO', 'Diagnosis', 'Life.Style', 'Sleep', 'Category', 'Age.Group'])

In [49]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from imblearn.over_sampling import SMOTE

X = df.drop('Mortality', axis=1)
y = df['Mortality']

categorical_columns = X.select_dtypes(include=['object']).columns

onehot_encoder = OneHotEncoder()
X_encoded = onehot_encoder.fit_transform(X[categorical_columns])

X_encoded_df = pd.DataFrame(X_encoded.toarray(), columns=onehot_encoder.get_feature_names_out(categorical_columns))

X_final = pd.concat([X.drop(categorical_columns, axis=1), X_encoded_df], axis=1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_final)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=75)

# Apply SMOTE
smote = SMOTE(random_state=75)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=75)
rf_classifier.fit(X_train_resampled, y_train_resampled)

y_pred = rf_classifier.predict(X_test)

In [50]:

print("Testing Accuracy:", rf_classifier.score(X_test, y_test))

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Testing Accuracy: 0.9324324324324325
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.98      0.96        58
           1       0.92      0.75      0.83        16

    accuracy                           0.93        74
   macro avg       0.93      0.87      0.89        74
weighted avg       0.93      0.93      0.93        74

Confusion Matrix:
[[57  1]
 [ 4 12]]
