In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np

from sklearn.linear_model import LogisticRegression

from IPython.display import display
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import accuracy_score,confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score, classification_report, confusion_matrix, roc_curve, auc, f1_score


### Funções Regressão Logística

In [None]:
def LReg(df):
    X = df.drop('diagnostico', axis=1)
    y = df['diagnostico']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

    class_weight = {0: sum(y == 1) / len(y), 1: sum(y == 0) / len(y)}

    params_grid = {
        'penalty': ['l2'],  
        'C': np.logspace(-3, 3, 7),  
        'solver': ['lbfgs'],  
        'max_iter': [100, 1000] 
    }

    classifier = LogisticRegression(class_weight=class_weight, random_state=42)

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    grid_classifier = GridSearchCV(classifier, params_grid, scoring='accuracy', cv=cv, n_jobs=-1)
    grid_classifier.fit(X_val, y_val)
    
    best_parameters = grid_classifier.best_params_

    model = LogisticRegression(**best_parameters, class_weight=class_weight, random_state=42)

    model.fit(X_train, y_train)
    
    pred = model.predict(X_test)
    
    test_accuracy = accuracy_score(y_test, pred)
    cm = confusion_matrix(y_test, pred)
    ppv = precision_score(y_test, pred)
    sensit = recall_score(y_test, pred)
    specificity = cm[0,0] / (cm[0,0] + cm[0,1])
    
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    auc_score = roc_auc_score(y_test, y_pred_proba)
    f1 = f1_score(y_test, pred)
    print(f"F1-Score: {f1:.4f}")
    print(f"Acurácia: {test_accuracy:.4f}")
    print(f"PPV (Precision): {ppv:.4f}")
    print(f"Sensibilidade (Recall): {sensit:.4f}")
    print(f"Especificidade: {specificity:.4f}")
    print(f"AUC: {auc_score:.4f}")   

    
    print(classification_report(y_test, pred))
    print(confusion_matrix(y_test, pred))

    plt.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='d', cmap='Blues', xticklabels=['Sem Sepse', 'Com Sepse'], yticklabels=['Sem Sepse', 'Com Sepse'])
    plt.xlabel('Predito')
    plt.ylabel('Verdadeiro')
    plt.title('Matriz de Confusão')
    plt.show()


In [None]:
from imblearn.pipeline import make_pipeline
from imblearn.over_sampling import RandomOverSampler

def LReg_Over(df):
    X = df.drop('diagnostico', axis=1)
    y = df['diagnostico']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

    # Definindo a grade de parâmetros
    params_grid = {
        'logisticregression__penalty': ['l2'],  
        'logisticregression__C': np.logspace(-3, 3, 7),  
        'logisticregression__solver': ['lbfgs'],  
        'logisticregression__max_iter': [100, 1000] 
    }

    # Criando o pipeline com oversampling e logistic regression
    pipeline = make_pipeline(RandomOverSampler(random_state=42), LogisticRegression(random_state=42))

    # Validação cruzada estratificada
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # Grid search dentro da pipeline
    grid_classifier = GridSearchCV(pipeline, params_grid, scoring='accuracy', cv=cv)
    grid_classifier.fit(X_train, y_train)
    
    # Obtendo os melhores parâmetros
    best_parameters = grid_classifier.best_params_

    # Ajustando o modelo final com os melhores parâmetros
    model = grid_classifier.best_estimator_
    model.fit(X_train, y_train)
    
    # Fazendo previsões no conjunto de teste
    pred = model.predict(X_test)
    
    test_accuracy = accuracy_score(y_test, pred)
    cm = confusion_matrix(y_test, pred)
    ppv = precision_score(y_test, pred)
    sensit = recall_score(y_test, pred)
    specificity = cm[0,0] / (cm[0,0] + cm[0,1])
    
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    auc_score = roc_auc_score(y_test, y_pred_proba)
    f1 = f1_score(y_test, pred)
    print(f"F1-Score: {f1:.4f}")
    print(f"Acurácia: {test_accuracy:.4f}")
    print(f"PPV (Precision): {ppv:.4f}")
    print(f"Sensibilidade (Recall): {sensit:.4f}")
    print(f"Especificidade: {specificity:.4f}")
    print(f"AUC: {auc_score:.4f}")  
        
    print(classification_report(y_test, pred))
    print(confusion_matrix(y_test, pred))

    # Plotando a matriz de confusão
    plt.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='d', cmap='Blues', xticklabels=['Sem Sepse', 'Com Sepse'], yticklabels=['Sem Sepse', 'Com Sepse'])
    plt.xlabel('Predito')
    plt.ylabel('Verdadeiro')
    plt.title('Matriz de Confusão')
    plt.show()


### Valores Normais

In [None]:
df_normal = pd.read_csv('df_normal.csv')

In [None]:
LReg(df_normal)

In [None]:
LReg_Over(df_normal)

### KNN

In [None]:
df_knn = pd.read_csv('df_knn.csv')

In [None]:
LReg(df_knn)

In [None]:
LReg_Over(df_knn)

### Media

In [None]:
df_media = pd.read_csv('df_media.csv')

In [None]:
LReg(df_media)

In [None]:
LReg_Over(df_media)

### Mediana

In [None]:
df_mediana = pd.read_csv('df_mediana.csv')

In [None]:
LReg(df_mediana)

In [None]:
LReg_Over(df_mediana)

### Moda

In [None]:
df_moda = pd.read_csv('df_moda.csv')

In [None]:
LReg(df_moda)

In [None]:
LReg_Over(df_moda)

### Regressão

In [None]:
df_regressao = pd.read_csv('df_regressao.csv')

In [None]:
LReg(df_regressao)

In [None]:
LReg_Over(df_regressao)