In [None]:

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np

from IPython.display import display
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from imblearn.over_sampling import RandomOverSampler

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score,confusion_matrix, ConfusionMatrixDisplay, classification_report

In [None]:
def RandomForest(df):
    X = df.drop('diagnostico', axis=1)
    y = df['diagnostico']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
    
    params_grid = {'bootstrap': [True, False],
                    'max_depth': [20, 40, 60, 80],
                    'max_features': ['auto', 'sqrt'],
                    'min_samples_leaf': [1, 2, 4],
                    'min_samples_split': [2, 5, 10],
                    'n_estimators': [100, 200]}
    
    rf = RandomForestClassifier()
    grid_classifier = RandomizedSearchCV(estimator = rf, param_distributions = params_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
    grid_classifier.fit(X_val, y_val)
    
    best_parameters = grid_classifier.best_params_
    
    model = RandomForestClassifier(**best_parameters)
    model.fit(X_train, y_train)
    
    pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, pred)
    
    print(test_accuracy)    
    print(classification_report(y_test,pred))
    print(confusion_matrix(y_test, pred))
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='d', cmap='Blues', xticklabels=['Sem Sepse', 'Com Sepse'], yticklabels=['Sem Sepse', 'Com Sepse'])
    plt.xlabel('Predito')
    plt.ylabel('Verdadeiro')
    plt.title('Matriz de Confusão')
    plt.show()

In [None]:
def RandomForest_Over(df):
    X = df.drop('diagnostico', axis=1)
    y = df['diagnostico']
    
    # Oversampling da classe minoritária
    oversampler = RandomOverSampler(random_state=42)
    X_resampled, y_resampled = oversampler.fit_resample(X, y)
    
    # Amostragem estratificada
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)
    
    params_grid = {'bootstrap': [True, False],
                    'max_depth': [20, 40, 60, 80],
                    'max_features': ['auto', 'sqrt'],
                    'min_samples_leaf': [1, 2, 4],
                    'min_samples_split': [2, 5, 10],
                    'n_estimators': [100, 200]}
    
    rf = RandomForestClassifier()
    grid_classifier = RandomizedSearchCV(estimator = rf, param_distributions = params_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
    grid_classifier.fit(X_val, y_val)
    
    best_parameters = grid_classifier.best_params_
    
    model = RandomForestClassifier(**best_parameters)
    model.fit(X_train, y_train)
    
    pred = model.predict(X_test)
    
    test_accuracy = accuracy_score(y_test, pred)

    print(test_accuracy)    
    print(classification_report(y_test, pred))
    print(confusion_matrix(y_test, pred))
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='d', cmap='Blues', xticklabels=['Sem Sepse', 'Com Sepse'], yticklabels=['Sem Sepse', 'Com Sepse'])
    plt.xlabel('Predito')
    plt.ylabel('Verdadeiro')
    plt.title('Matriz de Confusão')
    plt.show()

Sem Tratar Missing

In [None]:
df_final = pd.read_csv('df_final.csv')

In [None]:
RandomForest(df_final)

In [None]:
RandomForest_Over(df_final)

Valores Normais

In [None]:
df_normal = pd.read_csv('df_normal.csv')

In [None]:
RandomForest(df_normal)

In [None]:
RandomForest_Over(df_normal)

KNN

In [None]:
df_knn = pd.read_csv('df_knn.csv')

In [None]:
RandomForest(df_knn)

In [None]:
RandomForest_Over(df_knn)

Media

In [None]:
df_media = pd.read_csv('df_media.csv')

In [None]:
RandomForest(df_media)

In [None]:
RandomForest_Over(df_media)

Mediana

In [None]:
df_mediana = pd.read_csv('df_mediana.csv')

In [None]:
RandomForest(df_mediana)

In [None]:
RandomForest_Over(df_mediana)

Moda

In [None]:
df_moda = pd.read_csv('df_moda.csv')

In [None]:
RandomForest(df_moda)

In [None]:
RandomForest_Over(df_moda)

Regressão

In [None]:
df_regressao = pd.read_csv('df_regressao.csv')

In [None]:
RandomForest(df_regressao)

In [None]:
RandomForest_Over(df_regressao)