# Importing useful libraries

In [1]:
# Importing the pandas library for data manipulation
import pandas as pd

# Configurar o pandas para exibir todo o conteúdo de texto sem truncamento
pd.set_option('display.max_colwidth', None)

# Import numpy library for efficient numeric operations
import numpy as np

from scipy.stats import f_oneway
import joblib
from itertools import combinations

# Defining function to structure tables

In [2]:
def tabela_metricas(caminho_modelo):
    
    modelo = joblib.load(caminho_modelo)
    
    metricas = {}
    for coluna in ["Accuracy", "Precision", "Recall", "F1-score"]:
        media = round(modelo[coluna].mean(), 2)
        mini = round(modelo[coluna].min(), 2)
        maxi = round(modelo[coluna].max(), 2)
        metricas[coluna] = f"{media}% ({mini}% - {maxi}%)" 
    
    return metricas  

In [3]:
def tabela(caminho_rl, caminho_rf, caminho_svm, caminho_gboost):
    
    index_rl = ['LR']
    index_rf = ['RF']
    index_svm = ['SVM']
    index_gboost = ["GBOOST"]
    
    
    tabela_rl = pd.DataFrame(data = tabela_metricas(caminho_rl), index = index_rl)
    tabela_rf = pd.DataFrame(data = tabela_metricas(caminho_rf), index = index_rf)
    tabela_svm = pd.DataFrame(data = tabela_metricas(caminho_svm), index = index_svm)
    tabela_gboost = pd.DataFrame(data = tabela_metricas(caminho_gboost), index = index_gboost)
    
    df_completa = pd.concat([tabela_rl, tabela_rf, tabela_svm, tabela_gboost])

    return df_completa

# Defining a function that organizes the hyperparameters selected by Gridsearch in a table

In [4]:
def extrair_parametros_rl(modelo):
    
    classificador = joblib.load(modelo)
    
    # Acesse os hiperparâmetros diretamente
    penalty = classificador.penalty
    solver = classificador.solver

    # Combine os hiperparâmetros em uma string formatada
    parametros_formatados = f"penalty='{penalty}', solver='{solver}'"

    return parametros_formatados

In [5]:
def extrair_parametros_rf(modelo):
    
    classificador = joblib.load(modelo)
    
    # Acesse os hiperparâmetros diretamente
    criterion = classificador.criterion
    n_estimators = classificador.n_estimators
    max_depth = classificador.max_depth
    min_samples_split = classificador.min_samples_split
    max_features = classificador.max_features
    class_weight = classificador.class_weight

    # Combine os hiperparâmetros em uma string formatada
    parametros_formatados = f"criterion='{criterion}', n_estimators={n_estimators}, max_depth={max_depth}, min_samples_split={min_samples_split}, max_features='{max_features}', class_weight='{class_weight}'"

    return parametros_formatados

In [6]:
def extrair_parametros_svm(modelo):
    
    classificador = joblib.load(modelo)
    
    # Acesse os hiperparâmetros diretamente
    kernel = classificador.kernel
    C = classificador.C
    gamma = classificador.gamma

    # Combine os hiperparâmetros em uma string formatada
    parametros_formatados = f"kernel='{kernel}', C={C}, gamma={gamma}"

    return parametros_formatados

In [7]:
def extrair_parametros_gboost(modelo):
    
    classificador = joblib.load(modelo)
    
    # Acesse os parâmetros diretamente
    max_depth = classificador.max_depth
    min_samples_split = classificador.min_samples_split
    n_estimators = classificador.n_estimators
    subsample = classificador.subsample

    # Combine os parâmetros em uma string formatada
    parametros_formatados = f"max_depth={max_depth}, min_samples_split={min_samples_split}, n_estimators={n_estimators}, subsample={subsample}"

    return parametros_formatados

In [8]:
def tabela_hiperparametros(caminho_rl, caminho_rf, caminho_svm, caminho_gboost):
    
    index = ['Hyperparameters']
    
    h_rl = [extrair_parametros_rl(caminho_rl)]
    h_rf = [extrair_parametros_rf(caminho_rf)]
    h_svm = [extrair_parametros_svm(caminho_svm)]
    h_gboost = [extrair_parametros_gboost(caminho_gboost)]
    
    dic = { "LR": h_rl[0],
        "RF": h_rf[0],
        "SVM": h_svm[0],
        "GBOOST": h_gboost[0]}

    tabela = pd.DataFrame(data = dic, index = index).T
    

    return tabela

# Defining function for ANOVA test

In [9]:
def teste_anova(*args: list):
    
    ''' Structuring the application of the ANOVA test '''
    
    # Applying the ANOVA test
    resultado_anova = f_oneway(*args)

    # Checking if the p-value is significant
    if resultado_anova.pvalue < 0.05:
        mensagem = "There is a significant difference between the averages."
    else:
        mensagem = "There is no significant difference between the means."

    # Displaying the result
    return {
        "Estatística F": round(resultado_anova.statistic,4),
        "Valor p": round(resultado_anova.pvalue,4),
        "Mensagem": mensagem
    }

In [10]:
def comparacao(grupo_1, grupo_2):
    
    comparacao_1 = joblib.load(grupo_1)
    comparacao_2 = joblib.load(grupo_2)
    
    dic = {
        "Accuracy": teste_anova(comparacao_1["Accuracy"].values.flatten(),comparacao_2["Accuracy"].values.flatten()).values(),
        "Precision": teste_anova(comparacao_1["Precision"].values.flatten(),comparacao_2["Precision"].values.flatten()).values(), 
        "Recall": teste_anova(comparacao_1["Recall"].values.flatten(),comparacao_2["Recall"].values.flatten()).values(),
        "F1-score": teste_anova(comparacao_1["F1-score"].values.flatten(),comparacao_2["F1-score"].values.flatten()).values(), 
    }
    
    index = ["F-statistic", "p-value", "Interpretation"]
    df =pd.DataFrame(data = dic, index = index).T 
    
    return df

In [11]:
def comparacao_entre_modelos(grupos):
    resultados = {}
    grupo_dados = [joblib.load(grupo) for grupo in grupos]

    # Iterate over all combinations of pairs
    for (idx1, dados1), (idx2, dados2) in combinations(enumerate(grupo_dados), 2):
        nome_grupo_1 = grupos[idx1]
        nome_grupo_2 = grupos[idx2]
        key = f"{nome_grupo_1} x {nome_grupo_2}"
        resultados[key] = {}

        for metric in ["Accuracy", "Precision", "Recall", "F1-score"]:
            resultado_anova = teste_anova(dados1[metric].values.flatten(), dados2[metric].values.flatten())
            resultados[key][metric] = resultado_anova

    # Constructing the dataframe for easier visualization
    data_frames = {}
    for pair_key, metrics_results in resultados.items():
        data_frames[pair_key] = pd.DataFrame(metrics_results).T

    return data_frames

# Applying the functions

# Risk stratification prediction

## Structured data

### Ministry of Health

In [8]:
RStabela_MS = tabela("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl",
               "1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rf",
               "1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_svm",
               "1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_gboost")
RStabela_MS

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,83.48% (75.41% - 90.98%),79.66% (70.71% - 91.2%),83.48% (75.41% - 90.98%),81.04% (72.49% - 90.33%)
RF,87.51% (79.51% - 91.8%),87.84% (79.94% - 92.07%),87.51% (79.51% - 91.8%),87.35% (79.62% - 91.87%)
SVM,85.94% (79.51% - 92.62%),86.54% (80.72% - 92.9%),85.94% (79.51% - 92.62%),85.86% (79.24% - 92.52%)
GBOOST,85.7% (77.87% - 91.8%),86.06% (76.76% - 91.99%),85.7% (77.87% - 91.8%),85.44% (77.21% - 91.67%)


In [9]:
# Saving the tables obtained
#tabela_RStabela_MS = RStabela_MS.to_excel("tabela_metricas_RStabela_MS.xlsx")

In [12]:
RShiperp_MS = tabela_hiperparametros("1- Risk stratification/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                    "1- Risk stratification/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                    "1- Risk stratification/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                    "1- Risk stratification/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
RShiperp_MS

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=1.0"


In [13]:
# Saving the tables obtained
tabela_RShiperp_MS = RShiperp_MS.to_excel("tabela_metricas_RShiperp_MS.xlsx")

#### Tem diferença entre os modelos de predição?

In [95]:
RSgrupo_MS = ["1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl", "1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rf", "1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_svm", "1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_gboost"]
RSresultado_MS = comparacao_entre_modelos(RSgrupo_MS)

# If you want to print the results
for key, df in RSresultado_MS.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl x 1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rf:
          Estatística F Valor p  \
Accuracy       109.4422     0.0   
Precision       218.115     0.0   
Recall         109.4422     0.0   
F1-score       192.5871     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl x 1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_svm:
          Estatística F Valor p  \
Accuracy        37.4323     0.0   
Precision      149.8737     0.0   
Recall          37.4323  

### Variables with significance

In [14]:
RStabela_Art = tabela("1- Risk stratification/3.1- Article - Quimio/RStabela_metricasART_rl",
                     "1- Risk stratification/3.1- Article - Quimio/RStabela_metricasART_rf",
                     "1- Risk stratification/3.1- Article - Quimio/RStabela_metricasART_svm",
                     "1- Risk stratification/3.1- Article - Quimio/RStabela_metricasART_gboost")
RStabela_Art

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,69.51% (62.5% - 78.41%),69.2% (57.63% - 81.72%),69.51% (62.5% - 78.41%),65.44% (57.39% - 76.12%)
RF,50.3% (35.23% - 63.64%),64.67% (52.54% - 75.81%),50.3% (35.23% - 63.64%),54.79% (41.84% - 66.44%)
SVM,70.39% (56.82% - 79.55%),74.05% (63.08% - 82.2%),70.39% (56.82% - 79.55%),65.01% (47.32% - 76.6%)
GBOOST,68.05% (59.09% - 78.41%),65.38% (56.16% - 78.65%),68.05% (59.09% - 78.41%),65.41% (56.81% - 76.7%)


In [15]:
# Saving the tables obtained
#tabela_RStabela_Art = RStabela_Art.to_excel("tabela_metricas_RStabela_Art.xlsx")

In [14]:
RShiperp_Art = tabela_hiperparametros("1- Risk stratification/3.1- Article - Quimio/melhor_modelo_RL",
                                     "1- Risk stratification/3.1- Article - Quimio/melhor_modelo_RF",
                                     "1- Risk stratification/3.1- Article - Quimio/melhor_modelo_SVM",
                                     "1- Risk stratification/3.1- Article - Quimio/melhor_modelo_GBoost")
RShiperp_Art

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=0.01, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [15]:
# Saving the tables obtained
tabela_RShiperp_Art = RShiperp_Art.to_excel("tabela_metricas_RShiperp_Art.xlsx")

In [13]:
RSgrupo_Art = ["1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rl",
            "1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rf",
            "1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_svm",
            "1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_gboost"]
RSresultado_Art = comparacao_entre_modelos(RSgrupo_Art)

# If you want to print the results
for key, df in RSresultado_Art.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rl x 1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rf:
          Estatística F Valor p  \
Accuracy       785.6262     0.0   
Precision       43.9007     0.0   
Recall         785.6262     0.0   
F1-score       282.0611     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rl x 1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_svm:
          Estatística F Valor p  \
Accuracy         2.4895  0.1162   
Precision       59.3244     0.0   
Recall           2.48

### Ministry of Health + Variables with significance

In [12]:
RStabela_MSArt = tabela("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl",
                      "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rf",
                      "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_svm",
                      "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_gboost"
)
RStabela_MSArt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,83.2% (74.71% - 90.8%),79.47% (70.0% - 91.21%),83.2% (74.71% - 90.8%),80.98% (71.33% - 90.29%)
RF,85.49% (73.56% - 90.8%),86.94% (74.56% - 93.64%),85.49% (73.56% - 90.8%),85.76% (72.56% - 91.34%)
SVM,85.1% (77.01% - 95.4%),85.32% (72.88% - 95.57%),85.1% (77.01% - 95.4%),84.53% (74.92% - 95.43%)
GBOOST,87.91% (80.46% - 94.25%),87.96% (75.04% - 94.51%),87.91% (80.46% - 94.25%),87.32% (78.19% - 94.27%)


In [13]:
# Saving the tables obtained
#tabela_RStabela_MSArt = RStabela_MSArt.to_excel("tabela_metricas_RStabela_MSArt.xlsx")

In [16]:
RShiperp_MSArt = tabela_hiperparametros("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_RL",
                                       "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_RF",
                                       "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_SVM",
                                       "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_GBoost")
RShiperp_MSArt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [17]:
# Saving the tables obtained
tabela_RShiperp_MSArt = RShiperp_MSArt.to_excel("tabela_metricas_RShiperp_MSArt.xlsx")

In [15]:
RSgrupo_MSArt = ["1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rl",
                      "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rf",
                      "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_svm",
                      "1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_gboost"]
RSresultado_MSArt = comparacao_entre_modelos(RSgrupo_MSArt)

# If you want to print the results
for key, df in RSresultado_MSArt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rl x 1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rf:
          Estatística F Valor p  \
Accuracy        19.9283     0.0   
Precision      138.3616     0.0   
Recall          19.9283     0.0   
F1-score        72.2348     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rl x 1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_svm:
          Estatíst

### Ministry of Health with Oversampling

In [16]:
RStabela_MS0 = tabela("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl",
                     "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rf",
                     "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_svm",
                     "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_gboost")
RStabela_MS0

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,85.08% (80.68% - 91.3%),85.28% (80.35% - 91.26%),85.08% (80.68% - 91.3%),84.7% (79.98% - 91.11%)
RF,93.45% (89.37% - 97.1%),93.56% (89.68% - 97.1%),93.45% (89.37% - 97.1%),93.45% (89.17% - 97.1%)
SVM,86.96% (81.64% - 93.72%),87.18% (81.44% - 93.7%),86.96% (81.64% - 93.72%),86.68% (81.32% - 93.64%)
GBOOST,92.62% (89.37% - 96.14%),92.74% (89.3% - 96.19%),92.62% (89.37% - 96.14%),92.61% (89.28% - 96.13%)


In [17]:
# Saving the tables obtained
#tabela_RStabela_MS0 = RStabela_MS0.to_excel("tabela_metricas_RStabela_MS0.xlsx")

In [18]:
RShiperp_MS0 = tabela_hiperparametros("1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                     "1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                     "1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                     "1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
RShiperp_MS0

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=400, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=0.8"


In [19]:
# Saving the tables obtained
tabela_RShiperp_MS0 = RShiperp_MS0.to_excel("tabela_metricas_RShiperp_MS0.xlsx")

In [16]:
RSgrupo_MS0 = ["1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl",
                      "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rf",
                      "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_svm",
                      "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_gboost"]
RSresultado_MS0 = comparacao_entre_modelos(RSgrupo_MS0)

# If you want to print the results
for key, df in RSresultado_MS0.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl x 1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rf:
          Estatística F Valor p  \
Accuracy      1044.5562     0.0   
Precision     1031.7652     0.0   
Recall        1044.5562     0.0   
F1-score      1065.3592     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl x 1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_svm:
          Estatística F Valor p  \
Accuracy        40.3829     0.0   
Prec

### Ministry of Health + Variables with significance with Oversampling

In [18]:
RStabela_MSArt0 = tabela("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_rl",
                        "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_rf",
                        "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_svm",
                        "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_gboost")
RStabela_MSArt0

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,83.14% (72.41% - 90.8%),79.53% (68.34% - 88.7%),83.14% (72.41% - 90.8%),80.92% (68.33% - 89.22%)
RF,87.03% (77.01% - 96.55%),87.36% (77.33% - 96.6%),87.03% (77.01% - 96.55%),86.59% (76.92% - 96.45%)
SVM,85.4% (66.67% - 93.1%),85.59% (70.59% - 93.36%),85.4% (66.67% - 93.1%),84.89% (67.0% - 93.15%)
GBOOST,87.1% (79.31% - 93.1%),87.4% (74.91% - 93.4%),87.1% (79.31% - 93.1%),86.72% (77.57% - 93.1%)


In [19]:
# Saving the tables obtained
#tabela_RStabela_MSArt0 = RStabela_MSArt0.to_excel("tabela_metricas_RStabela_MSArt0.xlsx")

In [20]:
RShiperp_MSArt0 = tabela_hiperparametros("1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                        "1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                        "1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                        "1- Risk stratification/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
RShiperp_MSArt0

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=400, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=0.8"


In [21]:
# Saving the tables obtained
tabela_RShiperp_MSArt0 = RShiperp_MSArt0.to_excel("tabela_metricas_RShiperp_MSArt0.xlsx")

In [18]:
RSgrupo_MSArt0 = ["1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rl",
                        "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rf",
                        "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_svm",
                        "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_gboost"]
RSresultado_MSArt0 = comparacao_entre_modelos(RSgrupo_MSArt0)

# If you want to print the results
for key, df in RSresultado_MSArt0.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rl x 1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rf:
          Estatística F Valor p  \
Accuracy         63.907     0.0   
Precision      182.2016     0.0   
Recall           63.907     0.0   
F1-score       107.7233     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rl x 1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metric

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [71]:
pergunta1_rl = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl",
                         "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl")
pergunta1_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,18.621,0.0,There is a significant difference between the averages.
Precision,108.655,0.0,There is a significant difference between the averages.
Recall,18.621,0.0,There is a significant difference between the averages.
F1-score,68.365,0.0,There is a significant difference between the averages.


In [72]:
pergunta1_rf = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rf",
                         "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rf")
pergunta1_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,453.109,0.0,There is a significant difference between the averages.
Precision,396.144,0.0,There is a significant difference between the averages.
Recall,453.109,0.0,There is a significant difference between the averages.
F1-score,449.351,0.0,There is a significant difference between the averages.


In [73]:
pergunta1_svm = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_svm",
                          "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_svm")
pergunta1_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,9.208,0.003,There is a significant difference between the averages.
Precision,3.494,0.063,There is no significant difference between the means.
Recall,9.208,0.003,There is a significant difference between the averages.
F1-score,5.759,0.017,There is a significant difference between the averages.


In [74]:
pergunta1_gboost = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_gboost",
                             "1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_gboost")
pergunta1_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,409.689,0.0,There is a significant difference between the averages.
Precision,387.506,0.0,There is a significant difference between the averages.
Recall,409.689,0.0,There is a significant difference between the averages.
F1-score,410.949,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [19]:
pergunta2_rl = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rl",
                         "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rl")
pergunta2_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0132,0.9086,There is no significant difference between the means.
Precision,0.0066,0.9356,There is no significant difference between the means.
Recall,0.0132,0.9086,There is no significant difference between the means.
F1-score,0.0104,0.919,There is no significant difference between the means.


In [20]:
pergunta2_rf = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rf",
                         "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rf")
pergunta2_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,9.387,0.0025,There is a significant difference between the averages.
Precision,0.578,0.448,There is no significant difference between the means.
Recall,9.387,0.0025,There is a significant difference between the averages.
F1-score,2.492,0.116,There is no significant difference between the means.


In [21]:
pergunta2_svm = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_svm",
                          "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_svm")
pergunta2_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.3441,0.5581,There is no significant difference between the means.
Precision,0.2163,0.6424,There is no significant difference between the means.
Recall,0.3441,0.5581,There is no significant difference between the means.
F1-score,0.4324,0.5116,There is no significant difference between the means.


In [22]:
pergunta2_gboost = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_gboost",
                             "1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_gboost")
pergunta2_gboost 

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2.9541,0.0872,There is no significant difference between the means.
Precision,1.1002,0.2955,There is no significant difference between the means.
Recall,2.9541,0.0872,There is no significant difference between the means.
F1-score,1.408,0.2368,There is no significant difference between the means.


## Structured data with aggravated risk stratification for patients exposed to pesticides

### Ministry of Health

In [20]:
RStabela_MS_alt = tabela("1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rl",
                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rf",
                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_svm",
                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_gboost")
RStabela_MS_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,71.7% (63.93% - 81.15%),70.79% (60.75% - 80.02%),71.7% (63.93% - 81.15%),70.72% (61.62% - 80.49%)
RF,69.65% (57.38% - 77.05%),70.29% (57.63% - 79.7%),69.65% (57.38% - 77.05%),69.57% (59.37% - 77.6%)
SVM,66.22% (58.2% - 75.41%),43.98% (33.87% - 56.87%),66.22% (58.2% - 75.41%),52.82% (42.82% - 64.84%)
GBOOST,69.87% (61.48% - 77.87%),68.89% (61.02% - 76.62%),69.87% (61.48% - 77.87%),68.96% (60.15% - 76.97%)


In [21]:
# Saving the tables obtained
#tabela_RStabela_MS_alt = RStabela_MS_alt.to_excel("tabela_metricas_RStabela_MS_alt.xlsx")

In [22]:
RShiperp_MS_alt = tabela_hiperparametros("1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
RShiperp_MS_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=0.001, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [23]:
# Saving the tables obtained
tabela_RShiperp_MS_alt = RShiperp_MS_alt.to_excel("tabela_metricas_RShiperp_MS_alt.xlsx")

In [46]:
RSgrupo_MS_alt = ["1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rl",
                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rf",
                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_svm",
                        "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_gboost"]
RSresultado_MS_alt = comparacao_entre_modelos(RSgrupo_MS_alt)

# If you want to print the results
for key, df in RSresultado_MS_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rf:
          Estatística F Valor p  \
Accuracy        17.5502     0.0   
Precision        0.8257  0.3646   
Recall          17.5502     0.0   
F1-score         5.2623  0.0228   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision    There is no significant difference between the means.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_svm:
      

### Ministry of Health + Variables with significance

In [24]:
RStabela_MSArt_alt = tabela("1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_gboost")
RStabela_MSArt_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,89.34% (82.76% - 96.55%),87.89% (79.91% - 95.42%),89.34% (82.76% - 96.55%),88.36% (81.31% - 95.96%)
RF,90.53% (81.61% - 95.4%),89.06% (79.15% - 96.48%),90.53% (81.61% - 95.4%),89.67% (80.34% - 95.89%)
SVM,89.69% (82.76% - 96.55%),88.09% (81.11% - 96.55%),89.69% (82.76% - 96.55%),88.7% (82.96% - 96.53%)
GBOOST,89.13% (81.61% - 95.4%),88.38% (80.47% - 94.35%),89.13% (81.61% - 95.4%),88.56% (81.17% - 94.8%)


In [25]:
# Saving the tables obtained
tabela_RStabela_MSArt_alt = RStabela_MSArt_alt.to_excel("tabela_metricas_RStabela_MSArt_alt.xlsx")

In [24]:
RShiperp_MSArt_alt = tabela_hiperparametros("1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_RL", 
                                           "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_RF",
                                           "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_SVM",
                                           "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/melhor_modelo_GBoost")
RShiperp_MSArt_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=50, subsample=0.8"


In [25]:
# Saving the tables obtained
tabela_RShiperp_MSArt_alt = RShiperp_MSArt_alt.to_excel("tabela_metricas_RShiperp_MSArt_alt.xlsx")

In [78]:
RSgrupo_MSArt_alt = ["1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_gboost"]
RSresultado_MSArt_alt = comparacao_entre_modelos(RSgrupo_MSArt_alt)

# If you want to print the results
for key, df in RSresultado_MSArt_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rf:
          Estatística F Valor p  \
Accuracy         9.9276  0.0019   
Precision        6.2437  0.0133   
Recall           9.9276  0.0019   
F1-score         9.8411   0.002   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + art

### Variables with significance

In [26]:
RStabela_Art_alt = tabela("1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_gboost")
RStabela_Art_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,87.38% (78.41% - 93.18%),86.43% (74.5% - 93.18%),87.38% (78.41% - 93.18%),86.74% (76.34% - 93.18%)
RF,78.61% (67.05% - 87.5%),86.57% (78.46% - 95.25%),78.61% (67.05% - 87.5%),81.05% (68.53% - 87.44%)
SVM,88.32% (82.95% - 96.59%),87.55% (80.73% - 96.66%),88.32% (82.95% - 96.59%),87.73% (81.49% - 96.61%)
GBOOST,85.76% (78.41% - 93.18%),85.15% (76.36% - 93.18%),85.76% (78.41% - 93.18%),85.23% (77.34% - 93.18%)


In [27]:
# Saving the tables obtained
tabela_RStabela_Art_alt = RStabela_Art_alt.to_excel("tabela_metricas_RStabela_Art_alt.xlsx")

In [26]:
RShiperp_Art_alt = tabela_hiperparametros("1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/melhor_modelo_RL",
                                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/melhor_modelo_RF",
                                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/melhor_modelo_SVM",
                                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/melhor_modelo_GBoost")
RShiperp_Art_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=4, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [27]:
# Saving the tables obtained
tabela_RShiperp_Art_alt = RShiperp_Art_alt.to_excel("tabela_metricas_RShiperp_Art_alt.xlsx")

In [79]:
RSgrupo_Art_alt = ["1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_gboost"]
RSresultado_Art_alt = comparacao_entre_modelos(RSgrupo_Art_alt)

# If you want to print the results
for key, df in RSresultado_Art_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rf:
          Estatística F Valor p  \
Accuracy        262.423     0.0   
Precision         0.073  0.7873   
Recall          262.423     0.0   
F1-score       139.9323     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision    There is no significant difference between the means.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_svm:
          Estatística F Valor p  \
Accuracy   

### Ministry of Health with Oversampling

In [28]:
RStabela_MS0_alt = tabela("1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_gboost")
RStabela_MS0_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,73.86% (67.9% - 79.01%),73.67% (67.21% - 79.4%),73.86% (67.9% - 79.01%),72.93% (66.64% - 78.25%)
RF,82.86% (77.37% - 87.65%),82.81% (76.93% - 87.65%),82.86% (77.37% - 87.65%),82.67% (77.09% - 87.44%)
SVM,75.61% (69.55% - 80.66%),75.98% (69.01% - 81.15%),75.61% (69.55% - 80.66%),75.01% (68.51% - 80.33%)
GBOOST,82.29% (77.78% - 88.48%),82.25% (77.22% - 88.44%),82.29% (77.78% - 88.48%),82.03% (77.34% - 88.38%)


In [29]:
# Saving the tables obtained
tabela_RStabela_MS0_alt = RStabela_MS0_alt.to_excel("tabela_metricas_RStabela_MS0_alt.xlsx")

In [80]:
RShiperp_MS0_alt = tabela_hiperparametros("1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_gboost")
RShiperp_MS0_alt

AttributeError: 'DataFrame' object has no attribute 'penalty'

In [29]:
# Saving the tables obtained
tabela_RShiperp_MS0_alt = RShiperp_MS0_alt.to_excel("tabela_metricas_RShiperp_MS0_alt.xlsx")

In [81]:
RSgrupo_MS0_alt = ["1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rf",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_svm",
                         "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_gboost"]
RSresultado_MS0_alt = comparacao_entre_modelos(RSgrupo_MS0_alt)

# If you want to print the results
for key, df in RSresultado_MS0_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rf:
          Estatística F Valor p  \
Accuracy       785.5202     0.0   
Precision       776.242     0.0   
Recall         785.5202     0.0   
F1-score       864.7525     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health over

### Ministry of Health + Variables with significance with Oversampling

In [30]:
RStabela_MSArt0_alt = tabela("1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rl",
                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rf",
                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_svm",
                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_gboost")
RStabela_MSArt0_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,89.36% (80.79% - 95.48%),90.02% (84.0% - 95.48%),89.36% (80.79% - 95.48%),88.99% (79.58% - 95.42%)
RF,95.41% (91.53% - 98.31%),95.52% (91.6% - 98.38%),95.41% (91.53% - 98.31%),95.41% (91.53% - 98.31%)
SVM,92.4% (87.57% - 96.05%),92.53% (88.03% - 96.15%),92.4% (87.57% - 96.05%),92.32% (87.16% - 96.05%)
GBOOST,94.56% (88.7% - 98.31%),94.67% (88.94% - 98.31%),94.56% (88.7% - 98.31%),94.54% (88.56% - 98.3%)


In [31]:
# Saving the tables obtained
tabela_RStabela_MSArt0_alt = RStabela_MSArt0_alt.to_excel("tabela_metricas_RStabela_MSArt0_alt.xlsx")

In [30]:
RShiperp_MSArt0_alt = tabela_hiperparametros("1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/melhor_modelo_RL",
                                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/melhor_modelo_RF",
                                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/melhor_modelo_SVM",
                                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/melhor_modelo_GBoost")
RShiperp_MSArt0_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=0.8"


In [31]:
# Saving the tables obtained
tabela_RShiperp_MSArt0_alt = RShiperp_MSArt0_alt.to_excel("tabela_metricas_RShiperp_MSArt0_alt.xlsx")

In [82]:
RSgrupo_MSArt0_alt = ["1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rl",
                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rf",
                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_svm",
                            "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_gboost"]
RSresultado_MSArt0_alt = comparacao_entre_modelos(RSgrupo_MSArt0_alt)

# If you want to print the results
for key, df in RSresultado_MSArt0_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rl x 1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rf:
          Estatística F Valor p  \
Accuracy        395.286     0.0   
Precision      415.3291     0.0   
Recall          395.286     0.0   
F1-score       404.9341     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rl x 1- Risk stratification/7- Analise_dados_alt

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [50]:
pergunta1_alt_rl = comparacao("1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl")
pergunta1_alt_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,26.62,0.0,There is a significant difference between the averages.
Precision,39.6014,0.0,There is a significant difference between the averages.
Recall,26.62,0.0,There is a significant difference between the averages.
F1-score,26.1074,0.0,There is a significant difference between the averages.


In [51]:
pergunta1_alt_rf = comparacao("1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rf")
pergunta1_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1032.6937,0.0,There is a significant difference between the averages.
Precision,785.0849,0.0,There is a significant difference between the averages.
Recall,1032.6937,0.0,There is a significant difference between the averages.
F1-score,993.7158,0.0,There is a significant difference between the averages.


In [52]:
pergunta1_alt_svm = comparacao("1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_svm")
pergunta1_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,489.9482,0.0,There is a significant difference between the averages.
Precision,3664.9358,0.0,There is a significant difference between the averages.
Recall,489.9482,0.0,There is a significant difference between the averages.
F1-score,1876.1154,0.0,There is a significant difference between the averages.


In [53]:
pergunta1_alt_gboost = comparacao("1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_gboost")
pergunta1_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1015.9877,0.0,There is a significant difference between the averages.
Precision,1002.3327,0.0,There is a significant difference between the averages.
Recall,1015.9877,0.0,There is a significant difference between the averages.
F1-score,1030.6439,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [54]:
pergunta2_alt_rl = comparacao("1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rl")
pergunta2_alt_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.002,0.964,There is no significant difference between the means.
Precision,28.7113,0.0,There is a significant difference between the averages.
Recall,0.002,0.964,There is no significant difference between the means.
F1-score,2.3534,0.1266,There is no significant difference between the means.


In [55]:
pergunta2_alt_rf = comparacao("1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rf")
pergunta2_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,246.7603,0.0,There is a significant difference between the averages.
Precision,317.4017,0.0,There is a significant difference between the averages.
Recall,246.7603,0.0,There is a significant difference between the averages.
F1-score,299.5542,0.0,There is a significant difference between the averages.


In [56]:
pergunta2_alt_svm = comparacao("1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_svm")
pergunta2_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,70.3392,0.0,There is a significant difference between the averages.
Precision,162.6237,0.0,There is a significant difference between the averages.
Recall,70.3392,0.0,There is a significant difference between the averages.
F1-score,112.7122,0.0,There is a significant difference between the averages.


In [57]:
pergunta2_alt_gboost = comparacao("1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_gboost")
pergunta2_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,236.3977,0.0,There is a significant difference between the averages.
Precision,312.2122,0.0,There is a significant difference between the averages.
Recall,236.3977,0.0,There is a significant difference between the averages.
F1-score,285.8072,0.0,There is a significant difference between the averages.


### Is there a difference between the models predicted with structured data and the models with restratified data?

#### MS

In [58]:
pergunta3_alt_rlMS = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rl")
pergunta3_alt_rlMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,662.7305,0.0,There is a significant difference between the averages.
Precision,199.0909,0.0,There is a significant difference between the averages.
Recall,662.7305,0.0,There is a significant difference between the averages.
F1-score,393.5744,0.0,There is a significant difference between the averages.


In [59]:
pergunta3_alt_rf = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_rf")
pergunta3_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1765.0379,0.0,There is a significant difference between the averages.
Precision,1449.0008,0.0,There is a significant difference between the averages.
Recall,1765.0379,0.0,There is a significant difference between the averages.
F1-score,1689.5336,0.0,There is a significant difference between the averages.


In [60]:
pergunta3_alt_svm = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_svm")
pergunta3_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1988.9869,0.0,There is a significant difference between the averages.
Precision,6149.4654,0.0,There is a significant difference between the averages.
Recall,1988.9869,0.0,There is a significant difference between the averages.
F1-score,3924.4144,0.0,There is a significant difference between the averages.


In [61]:
pergunta3_alt_gboost = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_alt_gboost")
pergunta3_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1283.5887,0.0,There is a significant difference between the averages.
Precision,1359.9767,0.0,There is a significant difference between the averages.
Recall,1283.5887,0.0,There is a significant difference between the averages.
F1-score,1263.5355,0.0,There is a significant difference between the averages.


#### MS + Art

In [62]:
pergunta3_alt_rl = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rl")
pergunta3_alt_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,191.0392,0.0,There is a significant difference between the averages.
Precision,209.9463,0.0,There is a significant difference between the averages.
Recall,191.0392,0.0,There is a significant difference between the averages.
F1-score,209.0122,0.0,There is a significant difference between the averages.


In [63]:
pergunta3_alt_rf = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_rf")
pergunta3_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,121.6068,0.0,There is a significant difference between the averages.
Precision,15.9634,0.0001,There is a significant difference between the averages.
Recall,121.6068,0.0,There is a significant difference between the averages.
F1-score,66.7508,0.0,There is a significant difference between the averages.


In [64]:
pergunta3_alt_svm = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_svm")
pergunta3_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,119.5139,0.0,There is a significant difference between the averages.
Precision,32.7776,0.0,There is a significant difference between the averages.
Recall,119.5139,0.0,There is a significant difference between the averages.
F1-score,83.615,0.0,There is a significant difference between the averages.


In [65]:
pergunta3_alt_gboost = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/RStabela_metricasMSart_alt_gboost")
pergunta3_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,7.8902,0.0055,There is a significant difference between the averages.
Precision,0.7474,0.3884,There is no significant difference between the means.
Recall,7.8902,0.0055,There is a significant difference between the averages.
F1-score,7.4494,0.0069,There is a significant difference between the averages.


#### Art

In [66]:
pergunta3_alt_rl = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rl")
pergunta3_alt_rl
7 - Classe_modelos/2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1532.9733,0.0,There is a significant difference between the averages.
Precision,755.485,0.0,There is a significant difference between the averages.
Recall,1532.9733,0.0,There is a significant difference between the averages.
F1-score,1733.5666,0.0,There is a significant difference between the averages.


In [67]:
pergunta3_alt_rf = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_rf")
pergunta3_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1446.5796,0.0,There is a significant difference between the averages.
Precision,1478.0742,0.0,There is a significant difference between the averages.
Recall,1446.5796,0.0,There is a significant difference between the averages.
F1-score,1855.687,0.0,There is a significant difference between the averages.


In [68]:
pergunta3_alt_svm = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_svm")
pergunta3_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1331.2362,0.0,There is a significant difference between the averages.
Precision,919.3489,0.0,There is a significant difference between the averages.
Recall,1331.2362,0.0,There is a significant difference between the averages.
F1-score,1493.0514,0.0,There is a significant difference between the averages.


In [70]:
pergunta3_alt_gboost = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/3.1- Article - Quimio/RStabela_metricasART_alt_gboost")
pergunta3_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1236.8097,0.0,There is a significant difference between the averages.
Precision,1148.647,0.0,There is a significant difference between the averages.
Recall,1236.8097,0.0,There is a significant difference between the averages.
F1-score,1370.1096,0.0,There is a significant difference between the averages.


#### MS Oversampling

In [71]:
pergunta3_alt_rl = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rl")
pergunta3_alt_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1210.1055,0.0,There is a significant difference between the averages.
Precision,1291.2629,0.0,There is a significant difference between the averages.
Recall,1210.1055,0.0,There is a significant difference between the averages.
F1-score,1223.7954,0.0,There is a significant difference between the averages.


In [72]:
pergunta3_alt_rf = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_rf")
pergunta3_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1689.6027,0.0,There is a significant difference between the averages.
Precision,1655.7673,0.0,There is a significant difference between the averages.
Recall,1689.6027,0.0,There is a significant difference between the averages.
F1-score,1692.6599,0.0,There is a significant difference between the averages.


In [73]:
pergunta3_alt_svm = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_svm")
pergunta3_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1327.9637,0.0,There is a significant difference between the averages.
Precision,1228.4426,0.0,There is a significant difference between the averages.
Recall,1327.9637,0.0,There is a significant difference between the averages.
F1-score,1346.6893,0.0,There is a significant difference between the averages.


In [74]:
pergunta3_alt_gboost = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_alt_gboost")
pergunta3_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1454.8371,0.0,There is a significant difference between the averages.
Precision,1441.9779,0.0,There is a significant difference between the averages.
Recall,1454.8371,0.0,There is a significant difference between the averages.
F1-score,1475.7662,0.0,There is a significant difference between the averages.


#### MS + Art Oversampling

In [75]:
pergunta3_alt_rl = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rl",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_rl")
pergunta3_alt_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,204.6102,0.0,There is a significant difference between the averages.
Precision,454.626,0.0,There is a significant difference between the averages.
Recall,204.6102,0.0,There is a significant difference between the averages.
F1-score,269.0939,0.0,There is a significant difference between the averages.


In [14]:
pergunta3_alt_rf = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rf",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_alt_rf")
pergunta3_alt_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,498.0968,0.0,There is a significant difference between the averages.
Precision,396.9709,0.0,There is a significant difference between the averages.
Recall,498.0968,0.0,There is a significant difference between the averages.
F1-score,488.8955,0.0,There is a significant difference between the averages.


In [76]:
pergunta3_alt_svm = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_svm",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_svm")
pergunta3_alt_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,260.4452,0.0,There is a significant difference between the averages.
Precision,222.2681,0.0,There is a significant difference between the averages.
Recall,260.4452,0.0,There is a significant difference between the averages.
F1-score,258.1818,0.0,There is a significant difference between the averages.


In [77]:
pergunta3_alt_gboost = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_gboost",
                               "1- Risk stratification/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/RStabela_metricasMSart0_alt_gboost")
pergunta3_alt_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,356.6082,0.0,There is a significant difference between the averages.
Precision,286.1099,0.0,There is a significant difference between the averages.
Recall,356.6082,0.0,There is a significant difference between the averages.
F1-score,351.4091,0.0,There is a significant difference between the averages.


## Data with variable selection by correlation

### Ministry of Health

In [32]:
RStabela_MS_corr = tabela("1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rl",
                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rf",
                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_svm",
                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_gboost")
RStabela_MS_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,66.94% (58.2% - 74.59%),65.7% (53.3% - 76.65%),66.94% (58.2% - 74.59%),63.46% (54.36% - 72.93%)
RF,65.51% (58.2% - 74.59%),66.6% (58.31% - 74.72%),65.51% (58.2% - 74.59%),65.26% (56.71% - 73.52%)
SVM,67.75% (59.02% - 77.05%),69.41% (57.82% - 82.15%),67.75% (59.02% - 77.05%),64.23% (52.93% - 73.96%)
GBOOST,65.05% (54.92% - 72.95%),65.2% (53.89% - 72.89%),65.05% (54.92% - 72.95%),64.15% (53.09% - 71.83%)


In [33]:
# Saving the tables obtained
tabela_RStabela_MS_corr = RStabela_MS_corr.to_excel("tabela_metricas_RStabela_MS_corr.xlsx")

In [32]:
RShiperp_MS_corr = tabela_hiperparametros("1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/melhor_modelo_RL",
                                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/melhor_modelo_RF",
                                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/melhor_modelo_SVM",
                                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/melhor_modelo_GBoost")
RShiperp_MS_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=50, subsample=1.0"


In [33]:
# Saving the tables obtained
tabela_RShiperp_MS_corr = RShiperp_MS_corr.to_excel("tabela_metricas_RShiperp_MS_corr.xlsx")

In [23]:
RSgrupo_MS_corr = ["1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rl",
                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rf",
                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_svm",
                         "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_gboost"]
RSresultado_MS_corr = comparacao_entre_modelos(RSgrupo_MS_corr)

# If you want to print the results
for key, df in RSresultado_MS_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rl x 1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rf:
          Estatística F Valor p  \
Accuracy         8.4939   0.004   
Precision        2.1916  0.1404   
Recall           8.4939   0.004   
F1-score        10.5544  0.0014   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision    There is no significant difference between the means.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rl x 1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_svm:
          Estatística F Valor p  \
Accuracy          2.601  0.1084

### Ministry of Health + Variables with significance

In [12]:
RStabela_MSArt_corr = tabela("1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rl",
                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rf",
                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_svm",
                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_gboost")
RStabela_MSArt_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,77.13% (66.67% - 83.91%),74.15% (61.96% - 84.73%),77.13% (66.67% - 83.91%),74.93% (63.09% - 83.44%)
RF,78.97% (70.11% - 86.21%),80.95% (70.17% - 88.71%),78.97% (70.11% - 86.21%),79.19% (69.94% - 86.71%)
SVM,78.6% (68.97% - 87.36%),79.21% (69.86% - 89.16%),78.6% (68.97% - 87.36%),77.87% (68.51% - 87.92%)
GBOOST,77.26% (66.67% - 85.06%),77.63% (62.71% - 86.51%),77.26% (66.67% - 85.06%),76.55% (64.39% - 84.92%)


In [35]:
# Saving the tables obtained
tabela_RStabela_MSArt_corr = RStabela_MSArt_corr.to_excel("tabela_metricas_RStabela_MSArt_corr.xlsx")

In [34]:
RShiperp_MSArt_corr = tabela_hiperparametros("1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/melhor_modelo_RL",
                                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/melhor_modelo_RF",
                                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/melhor_modelo_SVM",
                                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/melhor_modelo_GBoost")
RShiperp_MSArt_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=4, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=50, subsample=0.8"


In [35]:
# Saving the tables obtained
tabela_RShiperp_MSArt_corr = RShiperp_MSArt_corr.to_excel("tabela_metricas_RShiperp_MSArt_corr.xlsx")

In [13]:
RSgrupo_MSArt_corr = ["1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rl",
                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rf",
                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_svm",
                            "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_gboost"]
RSresultado_MSArt_corr = comparacao_entre_modelos(RSgrupo_MSArt_corr)

# If you want to print the results
for key, df in RSresultado_MSArt_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rl x 1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rf:
          Estatística F Valor p  \
Accuracy        12.9306  0.0004   
Precision      150.1228     0.0   
Recall          12.9306  0.0004   
F1-score        60.6048     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rl x 1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_svm:
          Esta

### Variables with significance

In [36]:
RStabela_Art_corr = tabela("1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rl",
                          "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rf",
                          "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_svm",
                          "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_gboost")
RStabela_Art_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.45% (46.59% - 64.77%),46.26% (21.71% - 70.15%),56.45% (46.59% - 64.77%),47.02% (29.62% - 61.48%)
RF,33.9% (18.18% - 47.73%),50.3% (15.74% - 67.28%),33.9% (18.18% - 47.73%),37.11% (16.56% - 53.8%)
SVM,58.64% (48.86% - 69.32%),34.55% (23.88% - 48.05%),58.64% (48.86% - 69.32%),43.43% (32.08% - 56.76%)
GBOOST,55.09% (34.09% - 63.64%),49.53% (27.03% - 61.2%),55.09% (34.09% - 63.64%),49.58% (31.72% - 61.84%)


In [37]:
# Saving the tables obtained
tabela_RStabela_Art_corr = RStabela_Art_corr.to_excel("tabela_metricas_RStabela_Art_corr.xlsx")

In [38]:
RShiperp_Art_corr = tabela_hiperparametros("1- Risk stratification/8- Analise_correlacao/3- Article/melhor_modelo_RL",
                                          "1- Risk stratification/8- Analise_correlacao/3- Article/melhor_modelo_RF",
                                          "1- Risk stratification/8- Analise_correlacao/3- Article/melhor_modelo_SVM",
                                          "1- Risk stratification/8- Analise_correlacao/3- Article/melhor_modelo_GBoost")
RShiperp_Art_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=4, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=0.001, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=0.8"


In [39]:
# Saving the tables obtained
tabela_RShiperp_Art_corr = RShiperp_Art_corr.to_excel("tabela_metricas_RShiperp_Art_corr.xlsx")

In [14]:
RSgrupo_Art_corr = ["1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rl",
                          "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rf",
                          "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_svm",
                          "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_gboost"]
RSresultado_Art_corr = comparacao_entre_modelos(RSgrupo_Art_corr)

# If you want to print the results
for key, df in RSresultado_Art_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rl x 1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rf:
          Estatística F Valor p  \
Accuracy       688.2343     0.0   
Precision         9.056   0.003   
Recall         688.2343     0.0   
F1-score        83.8848     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rl x 1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_svm:
          Estatística F Valor p  \
Accuracy        14.0596  0.0002   
Precision       95.7706     0.0   
Recal

### Ministry of Health with Oversampling

In [46]:
RStabela_MS0_corr = tabela("1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rl",
                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rf",
                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_svm",
                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_gboost")
RStabela_MS0_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,66.97% (59.9% - 74.4%),66.4% (58.3% - 73.91%),66.97% (59.9% - 74.4%),66.01% (57.45% - 73.65%)
RF,78.35% (71.01% - 84.54%),78.76% (70.76% - 84.54%),78.35% (71.01% - 84.54%),78.19% (70.43% - 84.39%)
SVM,69.29% (61.84% - 75.36%),70.3% (62.18% - 78.02%),69.29% (61.84% - 75.36%),68.81% (60.86% - 75.13%)
GBOOST,78.24% (70.05% - 84.54%),78.55% (70.36% - 85.59%),78.24% (70.05% - 84.54%),78.02% (69.38% - 84.55%)


In [47]:
# Saving the tables obtained
tabela_RStabela_MS0_corr = RStabela_MS0_corr.to_excel("tabela_metricas_RStabela_MS0_corr.xlsx")

In [44]:
RShiperp_MS0_corr = tabela_hiperparametros("1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/melhor_modelo_RL",
                                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/melhor_modelo_RF",
                                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/melhor_modelo_SVM",
                                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/melhor_modelo_GBoost")
RShiperp_MS0_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=100, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0"


In [45]:
# Saving the tables obtained
tabela_RShiperp_MS0_corr = RShiperp_MS0_corr.to_excel("tabela_metricas_RShiperp_MS0_corr.xlsx")

In [15]:
RSgrupo_MS0_corr = ["1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rl",
                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rf",
                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_svm",
                          "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_gboost"]
RSresultado_MS0_corr = comparacao_entre_modelos(RSgrupo_MS0_corr)

# If you want to print the results
for key, df in RSresultado_MS0_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rl x 1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rf:
          Estatística F Valor p  \
Accuracy       730.3055     0.0   
Precision      760.8449     0.0   
Recall         730.3055     0.0   
F1-score       721.1695     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rl x 1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_svm:
          

### Ministry of Health + Variables with significance with Oversampling

In [48]:
RStabela_MSArt0_corr = tabela("1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rl",
                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rf",
                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_svm",
                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_gboost")
RStabela_MSArt0_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,82.03% (75.33% - 90.67%),82.08% (74.64% - 91.11%),82.03% (75.33% - 90.67%),81.46% (74.72% - 90.41%)
RF,87.75% (82.0% - 92.67%),88.44% (82.18% - 92.92%),87.75% (82.0% - 92.67%),87.8% (82.01% - 92.73%)
SVM,84.83% (78.0% - 89.33%),85.02% (78.81% - 89.28%),84.83% (78.0% - 89.33%),84.72% (77.97% - 89.23%)
GBOOST,87.31% (80.67% - 94.0%),87.55% (82.1% - 94.18%),87.31% (80.67% - 94.0%),87.28% (81.06% - 94.0%)


In [49]:
# Saving the tables obtained
tabela_RStabela_MSArt0_corr = RStabela_MSArt0_corr.to_excel("tabela_metricas_RStabela_MSArt0_corr.xlsx")

In [50]:
RShiperp_MSArt0_corr = tabela_hiperparametros("1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/melhor_modelo_RL",
                                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/melhor_modelo_RF",
                                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/melhor_modelo_SVM",
                                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/melhor_modelo_GBoost")
RShiperp_MSArt0_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=400, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=0.8"


In [51]:
# Saving the tables obtained
tabela_RShiperp_MSArt0_corr = RShiperp_MSArt0_corr.to_excel("tabela_metricas_RShiperp_MSArt0_corr.xlsx")

In [16]:
RSgrupo_MSArt0_corr = ["1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rl",
                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rf",
                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_svm",
                             "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_gboost"]
RSresultado_MSArt0_corr = comparacao_entre_modelos(RSgrupo_MSArt0_corr)

# If you want to print the results
for key, df in RSresultado_MSArt0_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rl x 1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rf:
          Estatística F Valor p  \
Accuracy       217.7256     0.0   
Precision      269.4112     0.0   
Recall         217.7256     0.0   
F1-score       246.5214     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rl x 1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [17]:
pergunta1_corr_rl = comparacao("1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rl",
                               "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rl")
pergunta1_corr_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0024,0.9609,There is no significant difference between the means.
Precision,1.3249,0.2511,There is no significant difference between the means.
Recall,0.0024,0.9609,There is no significant difference between the means.
F1-score,20.8411,0.0,There is a significant difference between the averages.


In [18]:
pergunta1_corr_rf = comparacao("1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rf",
                              "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rf")
pergunta1_corr_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,869.3545,0.0,There is a significant difference between the averages.
Precision,737.1069,0.0,There is a significant difference between the averages.
Recall,869.3545,0.0,There is a significant difference between the averages.
F1-score,833.0966,0.0,There is a significant difference between the averages.


In [19]:
pergunta1_corr_svm = comparacao("1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_svm",
                               "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_svm")
pergunta1_corr_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,12.5109,0.0005,There is a significant difference between the averages.
Precision,2.2811,0.1326,There is no significant difference between the means.
Recall,12.5109,0.0005,There is a significant difference between the averages.
F1-score,80.7873,0.0,There is a significant difference between the averages.


In [20]:
pergunta1_corr_gboost = comparacao("1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_gboost",
                                  "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_gboost")
pergunta1_corr_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,961.4261,0.0,There is a significant difference between the averages.
Precision,818.4031,0.0,There is a significant difference between the averages.
Recall,961.4261,0.0,There is a significant difference between the averages.
F1-score,990.3875,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [21]:
pergunta2_corr_rl = comparacao("1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rl",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rl")
pergunta2_corr_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,112.8715,0.0,There is a significant difference between the averages.
Precision,220.1354,0.0,There is a significant difference between the averages.
Recall,112.8715,0.0,There is a significant difference between the averages.
F1-score,159.7006,0.0,There is a significant difference between the averages.


In [22]:
pergunta2_corr_rf = comparacao("1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rf",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rf")
pergunta2_corr_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,387.1915,0.0,There is a significant difference between the averages.
Precision,326.1736,0.0,There is a significant difference between the averages.
Recall,387.1915,0.0,There is a significant difference between the averages.
F1-score,381.006,0.0,There is a significant difference between the averages.


In [23]:
pergunta2_corr_svm = comparacao("1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_svm",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_svm")
pergunta2_corr_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,205.4264,0.0,There is a significant difference between the averages.
Precision,151.173,0.0,There is a significant difference between the averages.
Recall,205.4264,0.0,There is a significant difference between the averages.
F1-score,218.45,0.0,There is a significant difference between the averages.


In [24]:
pergunta2_corr_gboost = comparacao("1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_gboost",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_gboost")
pergunta2_corr_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,492.8415,0.0,There is a significant difference between the averages.
Precision,385.0605,0.0,There is a significant difference between the averages.
Recall,492.8415,0.0,There is a significant difference between the averages.
F1-score,493.2832,0.0,There is a significant difference between the averages.


### Is there a difference between the models predicted with all variables and with the variables selected by correlation?

#### MS

In [25]:
pergunta3_corr_rlMS = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rl",
                              "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rl")
pergunta3_corr_rlMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1208.9286,0.0,There is a significant difference between the averages.
Precision,387.5831,0.0,There is a significant difference between the averages.
Recall,1208.9286,0.0,There is a significant difference between the averages.
F1-score,913.75,0.0,There is a significant difference between the averages.


In [26]:
pergunta3_corr_rfMS = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_rf",
                              "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_rf")
pergunta3_corr_rfMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2937.8058,0.0,There is a significant difference between the averages.
Precision,2551.6967,0.0,There is a significant difference between the averages.
Recall,2937.8058,0.0,There is a significant difference between the averages.
F1-score,2766.1779,0.0,There is a significant difference between the averages.


In [27]:
pergunta3_corr_svmMS = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_svm",
                              "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_svm")
pergunta3_corr_svmMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1743.9678,0.0,There is a significant difference between the averages.
Precision,878.688,0.0,There is a significant difference between the averages.
Recall,1743.9678,0.0,There is a significant difference between the averages.
F1-score,1805.419,0.0,There is a significant difference between the averages.


In [28]:
pergunta3_corr_gboostMS = comparacao("1- Risk stratification/1- Ministry_of_Health/4- Metrics/RStabela_metricasMS_gboost",
                              "1- Risk stratification/8- Analise_correlacao/1- Ministry_of_Health/RStabela_metricasMS_corr_gboost")
pergunta3_corr_gboostMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2034.8049,0.0,There is a significant difference between the averages.
Precision,1805.9795,0.0,There is a significant difference between the averages.
Recall,2034.8049,0.0,There is a significant difference between the averages.
F1-score,1991.9896,0.0,There is a significant difference between the averages.


#### MS + ART

In [29]:
pergunta3_corr_rlMSArt = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rl",
                              "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rl")
pergunta3_corr_rlMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,144.1564,0.0,There is a significant difference between the averages.
Precision,67.5509,0.0,There is a significant difference between the averages.
Recall,144.1564,0.0,There is a significant difference between the averages.
F1-score,107.3115,0.0,There is a significant difference between the averages.


In [30]:
pergunta3_corr_rfMSArt = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_rf",
                              "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_rf")
pergunta3_corr_rfMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,157.1406,0.0,There is a significant difference between the averages.
Precision,123.5984,0.0,There is a significant difference between the averages.
Recall,157.1406,0.0,There is a significant difference between the averages.
F1-score,157.0591,0.0,There is a significant difference between the averages.


In [31]:
pergunta3_corr_svmMSArt = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_svm",
                              "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_svm")
pergunta3_corr_svmMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,178.6372,0.0,There is a significant difference between the averages.
Precision,120.3425,0.0,There is a significant difference between the averages.
Recall,178.6372,0.0,There is a significant difference between the averages.
F1-score,156.3254,0.0,There is a significant difference between the averages.


In [32]:
pergunta3_corr_gboostMSArt = comparacao("1- Risk stratification/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_alt_gboost",
                              "1- Risk stratification/8- Analise_correlacao/2- Ministry_of_Health + article/RStabela_metricasMSart_corr_gboost")
pergunta3_corr_gboostMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,462.3343,0.0,There is a significant difference between the averages.
Precision,319.682,0.0,There is a significant difference between the averages.
Recall,462.3343,0.0,There is a significant difference between the averages.
F1-score,395.4533,0.0,There is a significant difference between the averages.


#### ART

In [33]:
pergunta3_corr_rlArt = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rl",
                              "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rl")
pergunta3_corr_rlArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,570.2762,0.0,There is a significant difference between the averages.
Precision,356.2486,0.0,There is a significant difference between the averages.
Recall,570.2762,0.0,There is a significant difference between the averages.
F1-score,605.1274,0.0,There is a significant difference between the averages.


In [34]:
pergunta3_corr_rfArt = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_rf",
                              "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_rf")
pergunta3_corr_rfArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,295.3608,0.0,There is a significant difference between the averages.
Precision,260.0986,0.0,There is a significant difference between the averages.
Recall,295.3608,0.0,There is a significant difference between the averages.
F1-score,309.6913,0.0,There is a significant difference between the averages.


In [36]:
pergunta3_corr_svmArt = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_svm",
                              "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_svm")
pergunta3_corr_svmArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,397.6738,0.0,There is a significant difference between the averages.
Precision,4422.9358,0.0,There is a significant difference between the averages.
Recall,397.6738,0.0,There is a significant difference between the averages.
F1-score,895.2263,0.0,There is a significant difference between the averages.


In [37]:
pergunta3_corr_gboostArt = comparacao("1- Risk stratification/3.1- Article - Quimio/4- Metrics/RStabela_metricasART_gboost",
                              "1- Risk stratification/8- Analise_correlacao/3- Article/RStabela_metricasART_alt_gboost")
pergunta3_corr_gboostArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,456.3202,0.0,There is a significant difference between the averages.
Precision,389.0207,0.0,There is a significant difference between the averages.
Recall,456.3202,0.0,There is a significant difference between the averages.
F1-score,426.2284,0.0,There is a significant difference between the averages.


#### MS Oversampling

In [38]:
pergunta3_corr_rlMS0 = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rl",
                              "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rl")
pergunta3_corr_rlMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2314.6965,0.0,There is a significant difference between the averages.
Precision,2216.0192,0.0,There is a significant difference between the averages.
Recall,2314.6965,0.0,There is a significant difference between the averages.
F1-score,2048.4633,0.0,There is a significant difference between the averages.


In [39]:
pergunta3_corr_rfMS0 = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_rf",
                              "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_rf")
pergunta3_corr_rfMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2214.3615,0.0,There is a significant difference between the averages.
Precision,2054.1599,0.0,There is a significant difference between the averages.
Recall,2214.3615,0.0,There is a significant difference between the averages.
F1-score,2175.2411,0.0,There is a significant difference between the averages.


In [40]:
pergunta3_corr_svmMS0 = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_svm",
                              "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_svm")
pergunta3_corr_svmMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2799.779,0.0,There is a significant difference between the averages.
Precision,2111.4067,0.0,There is a significant difference between the averages.
Recall,2799.779,0.0,There is a significant difference between the averages.
F1-score,2709.7876,0.0,There is a significant difference between the averages.


In [41]:
pergunta3_corr_gboostMS0 = comparacao("1- Risk stratification/4- Ministry_of_Health oversampling/4- Metrics/RStabela_metricasMSo_gboost",
                              "1- Risk stratification/8- Analise_correlacao/4- Ministry_of_Health oversampling/RStabela_metricasMSo_corr_gboost")
pergunta3_corr_gboostMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2351.0224,0.0,There is a significant difference between the averages.
Precision,2197.6408,0.0,There is a significant difference between the averages.
Recall,2351.0224,0.0,There is a significant difference between the averages.
F1-score,2326.2838,0.0,There is a significant difference between the averages.


#### MS + ART Oversampling

In [42]:
pergunta3_corr_rlMSArt0 = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rl",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rl")
pergunta3_corr_rlMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,5.9576,0.0155,There is a significant difference between the averages.
Precision,22.6875,0.0,There is a significant difference between the averages.
Recall,5.9576,0.0155,There is a significant difference between the averages.
F1-score,1.1216,0.2909,There is no significant difference between the means.


In [43]:
pergunta3_corr_rfMSArt0 = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_rf",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_rf")
pergunta3_corr_rfMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2.8562,0.0926,There is no significant difference between the means.
Precision,5.826,0.0167,There is a significant difference between the averages.
Recall,2.8562,0.0926,There is no significant difference between the means.
F1-score,7.4197,0.007,There is a significant difference between the averages.


In [44]:
pergunta3_corr_svmMSArt0 = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_svm",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_svm")
pergunta3_corr_svmMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.5668,0.2122,There is no significant difference between the means.
Precision,1.3369,0.249,There is no significant difference between the means.
Recall,1.5668,0.2122,There is no significant difference between the means.
F1-score,0.1347,0.714,There is no significant difference between the means.


In [45]:
pergunta3_corr_gboostMSArt0 = comparacao("1- Risk stratification/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/RStabela_metricasMSart0_gboost",
                              "1- Risk stratification/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/RStabela_metricasMSart0_corr_gboost")
pergunta3_corr_gboostMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.2311,0.6313,There is no significant difference between the means.
Precision,0.105,0.7462,There is no significant difference between the means.
Recall,0.2311,0.6313,There is no significant difference between the means.
F1-score,1.621,0.2044,There is no significant difference between the means.


## Unitary addition of the parameters that showed significance in the Ministry of Health variables

## Applying the ANOVA test

# Pesticide exposure prediction

## Structured data

### Ministry of Health

In [52]:
EXPtabela_MS = tabela("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl",
                     "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rf",
                     "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_svm",
                     "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_gboost")
EXPtabela_MS

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.03% (41.57% - 65.17%),57.52% (44.23% - 69.77%),56.03% (41.57% - 65.17%),55.56% (39.96% - 65.25%)
RF,64.27% (53.93% - 76.4%),64.88% (54.84% - 77.28%),64.27% (53.93% - 76.4%),64.17% (53.83% - 76.36%)
SVM,68.08% (55.06% - 76.4%),80.8% (75.29% - 84.2%),68.08% (55.06% - 76.4%),65.01% (48.31% - 75.09%)
GBOOST,69.4% (57.3% - 79.78%),70.39% (58.9% - 79.85%),69.4% (57.3% - 79.78%),69.32% (57.56% - 79.77%)


In [53]:
# Saving the tables obtained
tabela_EXPtabela_MS = EXPtabela_MS.to_excel("tabela_metricas_EXPtabela_MS.xlsx")

In [54]:
EXPhiperp_MS = tabela_hiperparametros("2- Exposure to pesticides/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                     "2- Exposure to pesticides/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                     "2- Exposure to pesticides/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                     "2- Exposure to pesticides/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MS

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=400, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=10"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=50, subsample=0.8"


In [55]:
# Saving the tables obtained
tabela_EXPhiperp_MS = EXPhiperp_MS.to_excel("tabela_metricas_EXPhiperp_MS.xlsx")

In [14]:
EXPgrupo_MS = ["2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl",
                     "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rf",
                     "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_svm",
                     "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_gboost"]
EXPresultado_MS = comparacao_entre_modelos(EXPgrupo_MS)

# If you want to print the results
for key, df in EXPresultado_MS.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl x 2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rf:
          Estatística F Valor p  \
Accuracy       141.4178     0.0   
Precision       109.758     0.0   
Recall         141.4178     0.0   
F1-score       143.1044     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl x 2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_svm:
          Estatística F Valor p  \
Accuracy        379.547     0.0   
Precision     2095.1929     0.0   
Recall   

### Variables with significance

In [56]:
EXPtabela_Art = tabela("2- Exposure to pesticides/3.1- Article - Quimio/EXPtabela_metricasArt_rl",
                      "2- Exposure to pesticides/3.1- Article - Quimio/EXPtabela_metricasArt_rf",
                      "2- Exposure to pesticides/3.1- Article - Quimio/EXPtabela_metricasArt_svm",
                      "2- Exposure to pesticides/3.1- Article - Quimio/EXPtabela_metricasArt_gboost")
EXPtabela_Art

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.6% (43.84% - 69.86%),57.61% (44.85% - 70.27%),56.6% (43.84% - 69.86%),56.16% (36.47% - 69.85%)
RF,63.82% (52.05% - 73.97%),64.51% (52.5% - 73.9%),63.82% (52.05% - 73.97%),63.58% (51.66% - 73.92%)
SVM,66.15% (47.95% - 75.34%),67.34% (56.15% - 76.96%),66.15% (47.95% - 75.34%),66.02% (46.67% - 75.34%)
GBOOST,63.51% (50.68% - 72.6%),64.59% (53.13% - 72.7%),63.51% (50.68% - 72.6%),63.17% (48.81% - 72.41%)


In [57]:
# Saving the tables obtained
tabela_EXPtabela_Art = EXPtabela_Art.to_excel("tabela_metricas_EXPtabela_Art.xlsx")

In [58]:
EXPhiperp_Art = tabela_hiperparametros("2- Exposure to pesticides/3.1- Article - Quimio/melhor_modelo_RL",
                                      "2- Exposure to pesticides/3.1- Article - Quimio/melhor_modelo_RF",
                                      "2- Exposure to pesticides/3.1- Article - Quimio/melhor_modelo_SVM",
                                      "2- Exposure to pesticides/3.1- Article - Quimio/melhor_modelo_GBoost")
EXPhiperp_Art

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=1, gamma=10"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=100, subsample=0.8"


In [59]:
# Saving the tables obtained
tabela_EXPhiperp_Art = EXPhiperp_Art.to_excel("tabela_metricas_EXPhiperp_Art.xlsx")

In [17]:
EXPgrupo_Art = ["2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl",
                      "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf",
                      "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm",
                      "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_gboost"]
EXPresultado_Art = comparacao_entre_modelos(EXPgrupo_Art)

# If you want to print the results
for key, df in EXPresultado_Art.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl x 2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf:
          Estatística F Valor p  \
Accuracy        98.6215     0.0   
Precision        83.068     0.0   
Recall          98.6215     0.0   
F1-score        94.3514     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl x 2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm:
          Estatística F Valor p  \
Accuracy       169.8063     0.0   
Precision      167.8759     0.0   
Recal

### Ministry of Health + Variables with significance

In [61]:
EXPtabela_MSArt = tabela("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl",
                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rf",
                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_svm",
                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_gboost")
EXPtabela_MSArt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.72% (43.66% - 67.61%),58.29% (43.89% - 68.23%),56.72% (43.66% - 67.61%),56.43% (43.46% - 67.59%)
RF,62.86% (46.48% - 76.06%),63.69% (48.83% - 76.14%),62.86% (46.48% - 76.06%),62.73% (45.41% - 76.09%)
SVM,65.58% (50.7% - 78.87%),80.07% (65.63% - 85.34%),65.58% (50.7% - 78.87%),61.88% (40.43% - 78.1%)
GBOOST,64.35% (50.7% - 77.46%),65.41% (50.57% - 77.57%),64.35% (50.7% - 77.46%),64.24% (50.62% - 77.46%)


In [62]:
# Saving the tables obtained
tabela_EXPtabela_MSArt = EXPtabela_MSArt.to_excel("tabela_metricas_EXPtabela_MSArt.xlsx")

In [63]:
EXPhiperp_MSArt = tabela_hiperparametros("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RL",
                                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RF",
                                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_SVM",
                                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MSArt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=4, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=10"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=1.0"


In [64]:
# Saving the tables obtained
tabela_EXPhiperp_MSArt = EXPhiperp_MSArt.to_excel("tabela_metricas_EXPhiperp_MSArt.xlsx")

In [15]:
EXPgrupo_MSArt = ["2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl",
                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rf",
                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_svm",
                        "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_gboost"]
EXPresultado_MSArt = comparacao_entre_modelos(EXPgrupo_MSArt)

# If you want to print the results
for key, df in EXPresultado_MSArt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl x 2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rf:
          Estatística F Valor p  \
Accuracy        74.1211     0.0   
Precision       56.6608     0.0   
Recall          74.1211     0.0   
F1-score        75.3324     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl x 2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_svm:
          Estatística 

### Ministry of Health with Oversampling

In [65]:
EXPtabela_MS0 = tabela("2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl",
                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rf",
                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_svm",
                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_gboost")
EXPtabela_MS0

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.71% (48.95% - 63.64%),57.72% (49.84% - 71.16%),56.71% (48.95% - 63.64%),56.25% (47.58% - 63.44%)
RF,63.62% (55.94% - 71.33%),64.07% (55.89% - 71.7%),63.62% (55.94% - 71.33%),63.51% (55.89% - 71.33%)
SVM,60.13% (42.66% - 72.03%),68.51% (46.22% - 78.32%),60.13% (42.66% - 72.03%),55.53% (33.64% - 70.69%)
GBOOST,63.06% (55.94% - 70.63%),63.6% (57.37% - 70.64%),63.06% (55.94% - 70.63%),62.97% (55.94% - 70.63%)


In [66]:
# Saving the tables obtained
tabela_EXPtabela_MS0 = EXPtabela_MS0.to_excel("tabela_metricas_EXPtabela_MS0.xlsx")

In [67]:
EXPhiperp_MS0 = tabela_hiperparametros("2- Exposure to pesticides/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MS0

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=10, min_samples_split=15, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=1.0"


In [68]:
# Saving the tables obtained
tabela_EXPhiperp_MS0 = EXPhiperp_MS0.to_excel("tabela_metricas_EXPhiperp_MS0.xlsx")

In [18]:
EXPgrupo_MS0 = ["2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl",
                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rf",
                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_svm",
                      "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_gboost"]
EXPresultado_MS0 = comparacao_entre_modelos(EXPgrupo_MS0)

# If you want to print the results
for key, df in EXPresultado_MS0.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl x 2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rf:
          Estatística F Valor p  \
Accuracy       215.8501     0.0   
Precision       160.985     0.0   
Recall         215.8501     0.0   
F1-score       231.8109     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl x 2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_svm:
          Estatística F Valor p  \
Accuracy        22.2332

### Ministry of Health + Variables with significance with Oversampling

In [69]:
EXPtabela_MSArt0 = tabela("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl",
                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf",
                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm",
                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost")
EXPtabela_MSArt0

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.16% (46.6% - 64.08%),56.99% (47.24% - 66.47%),56.16% (46.6% - 64.08%),55.9% (44.98% - 63.87%)
RF,60.86% (52.43% - 70.87%),61.71% (52.4% - 73.65%),60.86% (52.43% - 70.87%),60.66% (51.61% - 70.82%)
SVM,55.03% (41.75% - 65.05%),66.67% (38.1% - 79.04%),55.03% (41.75% - 65.05%),47.14% (28.25% - 62.14%)
GBOOST,58.43% (44.66% - 67.96%),59.2% (45.51% - 72.14%),58.43% (44.66% - 67.96%),58.26% (44.88% - 67.99%)


In [70]:
# Saving the tables obtained
tabela_EXPtabela_MSArt0 = EXPtabela_MSArt0.to_excel("tabela_metricas_EXPtabela_MSArt0.xlsx")

In [71]:
EXPhiperp_MSArt0 = tabela_hiperparametros("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RL",
                                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RF",
                                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_SVM",
                                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MSArt0

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [72]:
# Saving the tables obtained
tabela_EXPhiperp_MSArt0 = EXPhiperp_MSArt0.to_excel("tabela_metricas_EXPhiperp_MSArt0.xlsx")

In [19]:
EXPgrupo_MSArt0 = ["2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl",
                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf",
                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm",
                         "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost"]
EXPresultado_MSArt0 = comparacao_entre_modelos(EXPgrupo_MSArt0)

# If you want to print the results
for key, df in EXPresultado_MSArt0.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl x 2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf:
          Estatística F Valor p  \
Accuracy        64.8521     0.0   
Precision       60.5895     0.0   
Recall          64.8521     0.0   
F1-score        64.6189     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl x 2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [20]:
pergunta1_rl = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl")
pergunta1_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.3757,0.2422,There is no significant difference between the means.
Precision,0.107,0.7439,There is no significant difference between the means.
Recall,1.3757,0.2422,There is no significant difference between the means.
F1-score,1.3083,0.2541,There is no significant difference between the means.


In [21]:
pergunta1_rf = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rf",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rf")
pergunta1_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.1508,0.2847,There is no significant difference between the means.
Precision,1.7385,0.1889,There is no significant difference between the means.
Recall,1.1508,0.2847,There is no significant difference between the means.
F1-score,1.1513,0.2846,There is no significant difference between the means.


In [22]:
pergunta1_svm = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_svm",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_svm")
pergunta1_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,108.5679,0.0,There is a significant difference between the averages.
Precision,291.5889,0.0,There is a significant difference between the averages.
Recall,108.5679,0.0,There is a significant difference between the averages.
F1-score,90.8783,0.0,There is a significant difference between the averages.


In [23]:
pergunta1_gboost = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_gboost",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_gboost")
pergunta1_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,122.7721,0.0,There is a significant difference between the averages.
Precision,143.1465,0.0,There is a significant difference between the averages.
Recall,122.7721,0.0,There is a significant difference between the averages.
F1-score,120.9124,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [24]:
pergunta2_rl = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl",
                          "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl")
pergunta2_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.8483,0.3582,There is no significant difference between the means.
Precision,4.3174,0.039,There is a significant difference between the averages.
Recall,0.8483,0.3582,There is no significant difference between the means.
F1-score,0.7315,0.3934,There is no significant difference between the means.


In [25]:
pergunta2_rf = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rf",
                          "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf")
pergunta2_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,8.344,0.0043,There is a significant difference between the averages.
Precision,8.0617,0.005,There is a significant difference between the averages.
Recall,8.344,0.0043,There is a significant difference between the averages.
F1-score,8.7831,0.0034,There is a significant difference between the averages.


In [26]:
pergunta2_svm = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_svm",
                           "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm")
pergunta2_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,196.0294,0.0,There is a significant difference between the averages.
Precision,183.3417,0.0,There is a significant difference between the averages.
Recall,196.0294,0.0,There is a significant difference between the averages.
F1-score,192.7684,0.0,There is a significant difference between the averages.


In [27]:
pergunta2_gboost = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_gboost",
                              "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost")
pergunta2_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,63.503,0.0,There is a significant difference between the averages.
Precision,68.9631,0.0,There is a significant difference between the averages.
Recall,63.503,0.0,There is a significant difference between the averages.
F1-score,63.9493,0.0,There is a significant difference between the averages.


## Structured data with aggravated risk stratification for patients exposed to pesticides

### Ministry of Health

In [73]:
EXPtabela_MS_alt = tabela("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rl",
                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rf",
                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_svm",
                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_gboost")
EXPtabela_MS_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.27% (43.82% - 70.79%),57.88% (44.47% - 72.53%),56.27% (43.82% - 70.79%),55.57% (43.42% - 70.75%)
RF,72.93% (58.43% - 82.02%),73.89% (58.24% - 84.43%),72.93% (58.43% - 82.02%),72.88% (58.27% - 82.18%)
SVM,68.28% (59.55% - 78.65%),79.5% (71.01% - 85.19%),68.28% (59.55% - 78.65%),65.74% (54.08% - 77.8%)
GBOOST,73.62% (59.55% - 86.52%),74.35% (60.6% - 88.05%),73.62% (59.55% - 86.52%),73.57% (59.59% - 86.59%)


In [74]:
# Saving the tables obtained
tabela_EXPtabela_MS_alt = EXPtabela_MS_alt.to_excel("tabela_metricas_EXPtabela_MS_alt.xlsx")

In [75]:
EXPhiperp_MS_alt = tabela_hiperparametros("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MS_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=400, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=100, subsample=1.0"


In [76]:
# Saving the tables obtained
tabela_EXPhiperp_MS_alt = EXPhiperp_MS_alt.to_excel("tabela_metricas_EXPhiperp_MS_alt.xlsx")

In [28]:
EXPgrupo_MS_alt = ["2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rl",
                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rf",
                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_svm",
                         "2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_gboost"]
EXPresultado_MS_alt = comparacao_entre_modelos(EXPgrupo_MS_alt)

# If you want to print the results
for key, df in EXPresultado_MS_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rf:
          Estatística F Valor p  \
Accuracy       608.5998     0.0   
Precision      487.5485     0.0   
Recall         608.5998     0.0   
F1-score       635.3294     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS

### Variables with significance

In [77]:
EXPtabela_Art_alt = tabela("2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf", 
                           "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_gboost")
EXPtabela_Art_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,67.07% (56.16% - 82.19%),68.43% (56.02% - 84.08%),67.07% (56.16% - 82.19%),66.82% (56.05% - 82.38%)
RF,66.01% (54.79% - 73.97%),66.87% (54.85% - 74.6%),66.01% (54.79% - 73.97%),65.75% (53.87% - 73.95%)
SVM,67.62% (54.79% - 76.71%),68.91% (54.61% - 80.79%),67.62% (54.79% - 76.71%),67.31% (53.85% - 76.64%)
GBOOST,61.55% (46.58% - 72.6%),62.65% (46.73% - 72.57%),61.55% (46.58% - 72.6%),61.01% (46.58% - 72.53%)


In [78]:
# Saving the tables obtained
tabela_EXPtabela_Art_alt = EXPtabela_Art_alt.to_excel("tabela_metricas_EXPtabela_Art_alt.xlsx")

In [80]:
EXPhiperp_Art_alt = tabela_hiperparametros("2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_RL",
                                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_RF",
                                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_SVM",
                                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_GBoost")
EXPhiperp_Art_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=4, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=1.0"


In [81]:
# Saving the tables obtained
tabela_EXPhiperp_Art_alt = EXPhiperp_Art_alt.to_excel("tabela_metricas_EXPhiperp_Art_alt.xlsx")

In [29]:
EXPgrupo_Art_alt = ["2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf", 
                           "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_gboost"]
EXPresultado_Art_alt = comparacao_entre_modelos(EXPgrupo_Art_alt)

# If you want to print the results
for key, df in EXPresultado_Art_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf:
          Estatística F Valor p  \
Accuracy         2.6354  0.1061   
Precision        5.3659  0.0216   
Recall           2.6354  0.1061   
F1-score         2.6609  0.1044   

                                                          Mensagem  
Accuracy     There is no significant difference between the means.  
Precision  There is a significant difference between the averages.  
Recall       There is no significant difference between the means.  
F1-score     There is no significant difference between the means.   

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm:
  

### Ministry of Health + Variables with significance

In [82]:
EXPtabela_MSArt_alt = tabela("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rl",
                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rf",
                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_svm",
                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_gboost")
EXPtabela_MSArt_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,82.9% (74.65% - 90.14%),84.15% (75.74% - 91.15%),82.9% (74.65% - 90.14%),82.8% (74.19% - 90.2%)
RF,86.86% (77.46% - 94.37%),88.13% (77.68% - 94.37%),86.86% (77.46% - 94.37%),86.78% (76.89% - 94.37%)
SVM,83.66% (69.01% - 94.37%),84.96% (69.01% - 94.54%),83.66% (69.01% - 94.37%),83.55% (69.01% - 94.38%)
GBOOST,86.23% (69.01% - 94.37%),86.75% (75.04% - 94.49%),86.23% (69.01% - 94.37%),86.2% (69.27% - 94.35%)


In [83]:
# Saving the tables obtained
tabela_EXPtabela_MSArt_alt = EXPtabela_MSArt_alt.to_excel("tabela_metricas_EXPtabela_MSArt_alt.xlsx")

In [84]:
EXPhiperp_MSArt_alt = tabela_hiperparametros("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RL",
                                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RF",
                                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_SVM",
                                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MSArt_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=10, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=100, subsample=0.8"


In [85]:
# Saving the tables obtained
tabela_EXPhiperp_MSArt_alt = EXPhiperp_MSArt_alt.to_excel("tabela_metricas_EXPhiperp_MSArt_alt.xlsx")

In [30]:
EXPgrupo_MSArt_alt = ["2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rl",
                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rf",
                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_svm",
                            "2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_gboost"]
EXPresultado_MSArt_alt = comparacao_entre_modelos(EXPgrupo_MSArt_alt)

# If you want to print the results
for key, df in EXPresultado_MSArt_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rf:
          Estatística F Valor p  \
Accuracy        58.9461     0.0   
Precision       65.2152     0.0   
Recall          58.9461     0.0   
F1-score        57.7708     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rl x 2- Exposure to pesticides/6- Analise_dados_alt

### Ministry of Health with Oversampling

In [86]:
EXPtabela_MS0_alt = tabela("2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rl",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rf",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_svm",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_gboost")
EXPtabela_MS0_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,55.82% (47.55% - 65.03%),57.23% (49.03% - 65.79%),55.82% (47.55% - 65.03%),54.95% (44.73% - 64.51%)
RF,64.41% (55.24% - 74.83%),65.11% (55.68% - 75.57%),64.41% (55.24% - 74.83%),64.26% (54.57% - 74.66%)
SVM,64.53% (46.15% - 77.62%),71.98% (51.96% - 81.32%),64.53% (46.15% - 77.62%),61.2% (32.9% - 77.17%)
GBOOST,64.36% (55.94% - 70.63%),64.85% (56.05% - 71.1%),64.36% (55.94% - 70.63%),64.29% (55.97% - 70.65%)


In [87]:
# Saving the tables obtained
tabela_EXPtabela_MS0_alt = EXPtabela_MS0_alt.to_excel("tabela_metricas_EXPtabela_MS0_alt.xlsx")

In [88]:
EXPhiperp_MS0_alt = tabela_hiperparametros("2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MS0_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=400, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=0.8"


In [89]:
# Saving the tables obtained
tabela_EXPhiperp_MS0_alt = EXPhiperp_MS0_alt.to_excel("tabela_metricas_EXPhiperp_MS0_alt.xlsx")

In [31]:
EXPgrupo_MS0_alt = ["2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rl",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rf",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_svm",
                          "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_gboost"]
EXPresultado_MS0_alt = comparacao_entre_modelos(EXPgrupo_MS0_alt)

# If you want to print the results
for key, df in EXPresultado_MS0_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rf:
          Estatística F Valor p  \
Accuracy       288.7763     0.0   
Precision      241.1205     0.0   
Recall         288.7763     0.0   
F1-score       298.0528     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry

### Ministry of Health + Variables with significance with Oversampling

In [90]:
EXPtabela_MSArt0_alt = tabela("2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl",
                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf",
                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm",
                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost")
EXPtabela_MSArt0_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,85.58% (78.64% - 92.23%),86.11% (79.8% - 92.85%),85.58% (78.64% - 92.23%),85.54% (78.46% - 92.23%)
RF,85.97% (80.58% - 92.23%),86.63% (80.72% - 92.44%),85.97% (80.58% - 92.23%),85.91% (80.53% - 92.23%)
SVM,85.43% (77.67% - 91.26%),86.61% (79.99% - 91.28%),85.43% (77.67% - 91.26%),85.34% (77.56% - 91.26%)
GBOOST,83.93% (75.73% - 89.32%),84.38% (76.04% - 89.98%),83.93% (75.73% - 89.32%),83.92% (75.71% - 89.32%)


In [91]:
# Saving the tables obtained
tabela_EXPtabela_MSArt0_alt = EXPtabela_MSArt0_alt.to_excel("tabela_metricas_EXPtabela_MSArt0_alt.xlsx")

In [92]:
EXPhiperp_MSArt0_alt = tabela_hiperparametros("2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RL",
                                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RF",
                                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_SVM",
                                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MSArt0_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=0.8"


In [93]:
# Saving the tables obtained
tabela_EXPhiperp_MSArt0_alt = EXPhiperp_MSArt0_alt.to_excel("tabela_metricas_EXPhiperp_MSArt0_alt.xlsx")

In [32]:
EXPgrupo_MSArt0_alt = ["2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl",
                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf",
                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm",
                             "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost"]
EXPresultado_MSArt0_alt = comparacao_entre_modelos(EXPgrupo_MSArt0_alt)

# If you want to print the results
for key, df in EXPresultado_MSArt0_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl x 2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf:
          Estatística F Valor p  \
Accuracy         1.1081  0.2938   
Precision        2.1147  0.1475   
Recall           1.1081  0.2938   
F1-score         0.9969  0.3193   

                                                        Mensagem  
Accuracy   There is no significant difference between the means.  
Precision  There is no significant difference between the means.  
Recall     There is no significant difference between the means.  
F1-score   There is no significant difference between the means.   

Results for 2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl x 2- Exposure to pesti

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [33]:
EXPpergunta1_rl_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rl",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rl")
EXPpergunta1_rl_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.5473,0.4603,There is no significant difference between the means.
Precision,0.9297,0.3361,There is no significant difference between the means.
Recall,0.5473,0.4603,There is no significant difference between the means.
F1-score,0.9156,0.3398,There is no significant difference between the means.


In [34]:
EXPpergunta1_rf_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rf",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rf")
EXPpergunta1_rf_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,214.4077,0.0,There is a significant difference between the averages.
Precision,229.2692,0.0,There is a significant difference between the averages.
Recall,214.4077,0.0,There is a significant difference between the averages.
F1-score,213.6041,0.0,There is a significant difference between the averages.


In [35]:
EXPpergunta1_svm_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_svm",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_svm")
EXPpergunta1_svm_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,27.1096,0.0,There is a significant difference between the averages.
Precision,144.6793,0.0,There is a significant difference between the averages.
Recall,27.1096,0.0,There is a significant difference between the averages.
F1-score,23.2077,0.0,There is a significant difference between the averages.


In [36]:
EXPpergunta1_gboost_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_gboost",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_gboost")
EXPpergunta1_gboost_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,239.6918,0.0,There is a significant difference between the averages.
Precision,246.5106,0.0,There is a significant difference between the averages.
Recall,239.6918,0.0,There is a significant difference between the averages.
F1-score,238.407,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [37]:
EXPpergunta2_rl_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rl",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl")
EXPpergunta2_rl_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,35.1962,0.0,There is a significant difference between the averages.
Precision,19.7166,0.0,There is a significant difference between the averages.
Recall,35.1962,0.0,There is a significant difference between the averages.
F1-score,35.9325,0.0,There is a significant difference between the averages.


In [38]:
EXPpergunta2_rf_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rf",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf")
EXPpergunta2_rf_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,3.9932,0.0471,There is a significant difference between the averages.
Precision,12.7654,0.0004,There is a significant difference between the averages.
Recall,3.9932,0.0471,There is a significant difference between the averages.
F1-score,3.7025,0.0558,There is no significant difference between the means.


In [39]:
EXPpergunta2_svm_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_svm",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm")
EXPpergunta2_svm_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,11.5444,0.0008,There is a significant difference between the averages.
Precision,11.5292,0.0008,There is a significant difference between the averages.
Recall,11.5444,0.0008,There is a significant difference between the averages.
F1-score,11.4438,0.0009,There is a significant difference between the averages.


In [40]:
EXPpergunta2_gboost_alt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_gboost",
                                "2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost")
EXPpergunta2_gboost_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,19.0909,0.0,There is a significant difference between the averages.
Precision,22.6196,0.0,There is a significant difference between the averages.
Recall,19.0909,0.0,There is a significant difference between the averages.
F1-score,18.8699,0.0,There is a significant difference between the averages.


### Is there a difference between the models predicted with structured data and the models with restratified data?

#### MS

In [41]:
EXPpergunta3_alt_rlMS = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rl",
                              "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl")
EXPpergunta3_alt_rlMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.1239,0.7252,There is no significant difference between the means.
Precision,0.2356,0.6279,There is no significant difference between the means.
Recall,0.1239,0.7252,There is no significant difference between the means.
F1-score,0.0001,0.9924,There is no significant difference between the means.


In [42]:
EXPpergunta3_alt_rfMS = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_rf",
                              "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rf")
EXPpergunta3_alt_rfMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,154.2105,0.0,There is a significant difference between the averages.
Precision,166.7365,0.0,There is a significant difference between the averages.
Recall,154.2105,0.0,There is a significant difference between the averages.
F1-score,152.2442,0.0,There is a significant difference between the averages.


In [43]:
EXPpergunta3_alt_svmMS = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_svm",
                              "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_svm")
EXPpergunta3_alt_svmMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.1296,0.7193,There is no significant difference between the means.
Precision,13.0773,0.0004,There is a significant difference between the averages.
Recall,0.1296,0.7193,There is no significant difference between the means.
F1-score,1.1202,0.2912,There is no significant difference between the means.


In [44]:
EXPpergunta3_alt_gboostMS = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_alt_gboost",
                              "2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_gboost")
EXPpergunta3_alt_gboostMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,36.4956,0.0,There is a significant difference between the averages.
Precision,32.5158,0.0,There is a significant difference between the averages.
Recall,36.4956,0.0,There is a significant difference between the averages.
F1-score,36.7828,0.0,There is a significant difference between the averages.


#### MS + Art

In [45]:
EXPpergunta3_alt_rlMSArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rl",
                              "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl")
EXPpergunta3_alt_rlMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2015.2097,0.0,There is a significant difference between the averages.
Precision,1902.1075,0.0,There is a significant difference between the averages.
Recall,2015.2097,0.0,There is a significant difference between the averages.
F1-score,1964.2645,0.0,There is a significant difference between the averages.


In [46]:
EXPpergunta3_alt_rfMSArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_rf",
                              "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rf")
EXPpergunta3_alt_rfMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1326.268,0.0,There is a significant difference between the averages.
Precision,1476.2602,0.0,There is a significant difference between the averages.
Recall,1326.268,0.0,There is a significant difference between the averages.
F1-score,1298.6261,0.0,There is a significant difference between the averages.


In [47]:
EXPpergunta3_alt_svmMSArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_svm",
                              "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_svm")
EXPpergunta3_alt_svmMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,781.4508,0.0,There is a significant difference between the averages.
Precision,109.7073,0.0,There is a significant difference between the averages.
Recall,781.4508,0.0,There is a significant difference between the averages.
F1-score,822.8838,0.0,There is a significant difference between the averages.


In [48]:
EXPpergunta3_alt_gboostMSArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/EXPtabela_metricasMSart_gboost",
                              "2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_gboost")
EXPpergunta3_alt_gboostMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,888.7573,0.0,There is a significant difference between the averages.
Precision,904.7128,0.0,There is a significant difference between the averages.
Recall,888.7573,0.0,There is a significant difference between the averages.
F1-score,884.6861,0.0,There is a significant difference between the averages.


#### Art

In [49]:
EXPpergunta3_alt_rlArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl",
                              "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl")
EXPpergunta3_alt_rlArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,197.6532,0.0,There is a significant difference between the averages.
Precision,193.9585,0.0,There is a significant difference between the averages.
Recall,197.6532,0.0,There is a significant difference between the averages.
F1-score,189.7541,0.0,There is a significant difference between the averages.


In [50]:
EXPpergunta3_alt_rfArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf",
                              "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf")
EXPpergunta3_alt_rfArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,12.1172,0.0006,There is a significant difference between the averages.
Precision,13.0016,0.0004,There is a significant difference between the averages.
Recall,12.1172,0.0006,There is a significant difference between the averages.
F1-score,11.2631,0.0009,There is a significant difference between the averages.


In [51]:
EXPpergunta3_alt_svmArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm",
                              "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm")
EXPpergunta3_alt_svmArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,5.2242,0.0233,There is a significant difference between the averages.
Precision,5.6088,0.0188,There is a significant difference between the averages.
Recall,5.2242,0.0233,There is a significant difference between the averages.
F1-score,3.9623,0.0479,There is a significant difference between the averages.


In [52]:
EXPpergunta3_alt_gboostArt = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_gboost",
                              "2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_gboost")
EXPpergunta3_alt_gboostArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,7.7757,0.0058,There is a significant difference between the averages.
Precision,7.6614,0.0062,There is a significant difference between the averages.
Recall,7.7757,0.0058,There is a significant difference between the averages.
F1-score,8.9786,0.0031,There is a significant difference between the averages.


#### MS Oversampling

In [53]:
EXPpergunta3_alt_rlMS0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rl",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl")
EXPpergunta3_alt_rlMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,3.1291,0.0784,There is no significant difference between the means.
Precision,0.8436,0.3595,There is no significant difference between the means.
Recall,3.1291,0.0784,There is no significant difference between the means.
F1-score,5.8837,0.0162,There is a significant difference between the averages.


In [54]:
EXPpergunta3_alt_rfMS0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_rf",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rf")
EXPpergunta3_alt_rfMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2.782,0.0969,There is no significant difference between the means.
Precision,4.7604,0.0303,There is a significant difference between the averages.
Recall,2.782,0.0969,There is no significant difference between the means.
F1-score,2.4704,0.1176,There is no significant difference between the means.


In [55]:
EXPpergunta3_alt_svmMS0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_svm",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_svm")
EXPpergunta3_alt_svmMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,24.6666,0.0,There is a significant difference between the averages.
Precision,15.4703,0.0001,There is a significant difference between the averages.
Recall,24.6666,0.0,There is a significant difference between the averages.
F1-score,22.7757,0.0,There is a significant difference between the averages.


In [56]:
EXPpergunta3_alt_gboostMS0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_alt_gboost",
                              "2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_gboost")
EXPpergunta3_alt_gboostMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,8.4125,0.0041,There is a significant difference between the averages.
Precision,7.648,0.0062,There is a significant difference between the averages.
Recall,8.4125,0.0041,There is a significant difference between the averages.
F1-score,8.5806,0.0038,There is a significant difference between the averages.


#### MS + Art Oversampling

In [57]:
EXPpergunta3_alt_rlMSArt0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl",
                              "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl")
EXPpergunta3_alt_rlMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,3643.5894,0.0,There is a significant difference between the averages.
Precision,3561.6722,0.0,There is a significant difference between the averages.
Recall,3643.5894,0.0,There is a significant difference between the averages.
F1-score,3590.4602,0.0,There is a significant difference between the averages.


In [58]:
EXPpergunta3_alt_rfMSArt0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf",
                              "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf")
EXPpergunta3_alt_rfMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2622.8823,0.0,There is a significant difference between the averages.
Precision,2404.9014,0.0,There is a significant difference between the averages.
Recall,2622.8823,0.0,There is a significant difference between the averages.
F1-score,2613.3563,0.0,There is a significant difference between the averages.


In [59]:
EXPpergunta3_alt_svmMSArt0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm",
                              "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm")
EXPpergunta3_alt_svmMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2205.3353,0.0,There is a significant difference between the averages.
Precision,398.8083,0.0,There is a significant difference between the averages.
Recall,2205.3353,0.0,There is a significant difference between the averages.
F1-score,1750.1592,0.0,There is a significant difference between the averages.


In [60]:
EXPpergunta3_alt_gboostMSArt0 = comparacao("2- Exposure to pesticides/6- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost",
                              "2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost")
EXPpergunta3_alt_gboostMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2243.3411,0.0,There is a significant difference between the averages.
Precision,2074.2724,0.0,There is a significant difference between the averages.
Recall,2243.3411,0.0,There is a significant difference between the averages.
F1-score,2268.1714,0.0,There is a significant difference between the averages.


## Data with variable selection by correlation¶

### Ministry of Health

In [94]:
EXPtabela_MS_corr = tabela("2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rl",
                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rf",
                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_svm",
                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_gboost")
EXPtabela_MS_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,53.06% (41.57% - 64.04%),54.44% (41.15% - 66.37%),53.06% (41.57% - 64.04%),52.07% (35.77% - 64.05%)
RF,67.79% (55.06% - 78.65%),68.53% (55.06% - 78.75%),67.79% (55.06% - 78.65%),67.7% (55.06% - 78.57%)
SVM,65.24% (53.93% - 74.16%),74.77% (65.31% - 81.31%),65.24% (53.93% - 74.16%),62.43% (49.16% - 73.12%)
GBOOST,70.74% (59.55% - 79.78%),71.49% (59.55% - 80.19%),70.74% (59.55% - 79.78%),70.66% (59.46% - 79.74%)


In [95]:
# Saving the tables obtained
tabela_EXPtabela_MS_corr = EXPtabela_MS_corr.to_excel("tabela_metricas_EXPtabela_MS_corr.xlsx")

In [96]:
EXPhiperp_MS_corr = tabela_hiperparametros("2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MS_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=100, subsample=0.8"


In [97]:
# Saving the tables obtained
tabela_EXPhiperp_MS_corr = EXPhiperp_MS_corr.to_excel("tabela_metricas_EXPhiperp_MS_corr.xlsx")

In [89]:
EXPgrupo_MS_corr = ["2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rl",
                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rf",
                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_svm",
                          "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_gboost"]
EXPresultado_MS_corr = comparacao_entre_modelos(EXPgrupo_MS_corr)

# If you want to print the results
for key, df in EXPresultado_MS_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rf:
          Estatística F Valor p  \
Accuracy       508.0312     0.0   
Precision      452.6052     0.0   
Recall         508.0312     0.0   
F1-score       485.6177     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_svm:
      

### Variables with significance

In [102]:
EXPtabela_Art_corr = tabela("2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rl",
                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rf",
                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_svm",
                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_gboost")
EXPtabela_Art_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,45.97% (32.88% - 53.42%),35.88% (10.81% - 63.43%),45.97% (32.88% - 53.42%),36.36% (16.27% - 51.67%)
RF,47.74% (35.62% - 57.53%),47.05% (34.5% - 58.82%),47.74% (35.62% - 57.53%),44.42% (27.55% - 57.78%)
SVM,46.49% (36.99% - 49.32%),21.7% (13.68% - 24.32%),46.49% (36.99% - 49.32%),29.57% (19.97% - 32.58%)
GBOOST,46.27% (36.99% - 54.79%),43.49% (14.71% - 60.37%),46.27% (36.99% - 54.79%),40.58% (21.27% - 54.45%)


In [103]:
# Saving the tables obtained
tabela_EXPtabela_Art_corr = EXPtabela_Art_corr.to_excel("tabela_metricas_EXPtabela_Art_corr.xlsx")

In [104]:
EXPhiperp_Art_corr = tabela_hiperparametros("2- Exposure to pesticides/7- Analise_correlacao/3- Article/5- Best model/melhor_modelo_RL",
                                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/5- Best model/melhor_modelo_RF",
                                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/5- Best model/melhor_modelo_SVM",
                                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/5- Best model/melhor_modelo_GBoost")
EXPhiperp_Art_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=10, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=0.001, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [105]:
# Saving the tables obtained
tabela_EXPhiperp_Art_corr = EXPhiperp_Art_corr.to_excel("tabela_metricas_EXPhiperp_Art_corr.xlsx")

In [90]:
EXPgrupo_Art_corr = ["2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rl",
                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rf",
                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_svm",
                           "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_gboost"]
EXPresultado_Art_corr = comparacao_entre_modelos(EXPgrupo_Art_corr)

# If you want to print the results
for key, df in EXPresultado_Art_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rf:
          Estatística F Valor p  \
Accuracy         8.0941  0.0049   
Precision       50.8511     0.0   
Recall           8.0941  0.0049   
F1-score        56.3181     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_svm:
          Estatística F Valor p  \
Accuracy   

### Ministry of Health + Variables with significance

In [98]:
EXPtabela_MSArt_corr = tabela("2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rl",
                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rf",
                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_svm",
                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_gboost")
EXPtabela_MSArt_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,56.93% (47.89% - 66.2%),58.03% (48.25% - 69.18%),56.93% (47.89% - 66.2%),56.6% (47.26% - 66.2%)
RF,68.86% (54.93% - 80.28%),69.99% (54.91% - 82.75%),68.86% (54.93% - 80.28%),68.73% (53.72% - 80.32%)
SVM,71.46% (52.11% - 83.1%),80.43% (68.17% - 87.51%),71.46% (52.11% - 83.1%),69.54% (47.77% - 82.73%)
GBOOST,68.18% (53.52% - 84.51%),69.4% (55.84% - 84.55%),68.18% (53.52% - 84.51%),68.06% (52.8% - 84.51%)


In [99]:
# Saving the tables obtained
tabela_EXPtabela_MSArt_corr = EXPtabela_MSArt_corr.to_excel("tabela_metricas_EXPtabela_MSArt_corr.xlsx")

In [100]:
EXPhiperp_MSArt_corr = tabela_hiperparametros("2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_RL",
                                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_RF",
                                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_SVM",
                                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MSArt_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0"


In [101]:
# Saving the tables obtained
tabela_EXPhiperp_MSArt_corr = EXPhiperp_MSArt_corr.to_excel("tabela_metricas_EXPhiperp_MSArt_corr.xlsx")

In [91]:
EXPgrupo_MSArt_corr = ["2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rl",
                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rf",
                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_svm",
                             "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_gboost"]
EXPresultado_MSArt_corr = comparacao_entre_modelos(EXPgrupo_MSArt_corr)

# If you want to print the results
for key, df in EXPresultado_MSArt_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rf:
          Estatística F Valor p  \
Accuracy       293.9532     0.0   
Precision      284.2868     0.0   
Recall         293.9532     0.0   
F1-score       289.3561     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article

### Ministry of Health with Oversampling

In [106]:
EXPtabela_MS0_corr = tabela("2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rl",
                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rf",
                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_svm",
                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_gboost")
EXPtabela_MS0_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,53.63% (43.36% - 62.24%),54.76% (46.41% - 65.24%),53.63% (43.36% - 62.24%),52.76% (35.78% - 62.24%)
RF,62.92% (55.24% - 71.33%),63.52% (55.24% - 72.24%),62.92% (55.24% - 71.33%),62.8% (55.2% - 71.34%)
SVM,61.06% (46.15% - 69.93%),66.93% (52.46% - 76.94%),61.06% (46.15% - 69.93%),58.06% (36.22% - 68.29%)
GBOOST,63.28% (54.55% - 70.63%),63.93% (54.49% - 72.76%),63.28% (54.55% - 70.63%),63.22% (54.39% - 70.63%)


In [107]:
# Saving the tables obtained
tabela_EXPtabela_MS0_corr = EXPtabela_MS0_corr.to_excel("tabela_metricas_EXPtabela_MS0_corr.xlsx")

In [108]:
EXPhiperp_MS0_corr = tabela_hiperparametros("2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MS0_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=100, subsample=1.0"


In [109]:
# Saving the tables obtained
tabela_EXPhiperp_MS0_corr = EXPhiperp_MS0_corr.to_excel("tabela_metricas_EXPhiperp_MS0_corr.xlsx")

In [92]:
EXPgrupo_MS0_corr = ["2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rl",
                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rf",
                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_svm",
                           "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_gboost"]
EXPresultado_MS0_corr = comparacao_entre_modelos(EXPgrupo_MS0_corr)

# If you want to print the results
for key, df in EXPresultado_MS0_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rf:
          Estatística F Valor p  \
Accuracy       318.9354     0.0   
Precision      263.0842     0.0   
Recall         318.9354     0.0   
F1-score       309.4006     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversa

### Ministry of Health + Variables with significance with Oversampling

In [110]:
EXPtabela_MSArt0_corr = tabela("2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_gboost")
EXPtabela_MSArt0_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,53.17% (43.69% - 61.17%),54.05% (44.63% - 63.4%),53.17% (43.69% - 61.17%),52.83% (43.74% - 61.1%)
RF,60.51% (50.49% - 70.87%),61.13% (50.44% - 73.98%),60.51% (50.49% - 70.87%),60.39% (50.46% - 70.91%)
SVM,56.73% (41.75% - 66.99%),68.62% (38.03% - 79.22%),56.73% (41.75% - 66.99%),49.82% (26.23% - 64.25%)
GBOOST,58.57% (47.57% - 66.99%),59.46% (49.5% - 67.84%),58.57% (47.57% - 66.99%),58.46% (47.64% - 67.0%)


In [111]:
# Saving the tables obtained
tabela_EXPtabela_MSArt0_corr = EXPtabela_MSArt0_corr.to_excel("tabela_metricas_EXPtabela_MSArt0_corr.xlsx")

In [112]:
EXPhiperp_MSArt0_corr = tabela_hiperparametros("2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_RL",
                                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_RF",
                                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_SVM",
                                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_GBoost")
EXPhiperp_MSArt0_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=4, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=50, subsample=0.8"


In [113]:
# Saving the tables obtained
tabela_EXPhiperp_MSArt0_corr = EXPhiperp_MSArt0_corr.to_excel("tabela_metricas_EXPhiperp_MSArt0_corr.xlsx")

In [93]:
EXPgrupo_MSArt0_corr = ["2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_gboost"]
EXPresultado_MSArt0_corr = comparacao_entre_modelos(EXPgrupo_MSArt0_corr)

# If you want to print the results
for key, df in EXPresultado_MSArt0_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rl x 2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rf:
          Estatística F Valor p  \
Accuracy       183.5995     0.0   
Precision      155.7508     0.0   
Recall         183.5995     0.0   
F1-score       186.8614     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rl x 2- Exposure to pesticides/7- Analise_correl

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [61]:
EXPpergunta1_rl_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rl",
                                "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rl")
EXPpergunta1_rl_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.9639,0.3274,There is no significant difference between the means.
Precision,0.2847,0.5943,There is no significant difference between the means.
Recall,0.9639,0.3274,There is no significant difference between the means.
F1-score,1.0318,0.311,There is no significant difference between the means.


In [62]:
EXPpergunta1_rf_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rf",
                                "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rf")
EXPpergunta1_rf_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,66.5116,0.0,There is a significant difference between the averages.
Precision,69.8247,0.0,There is a significant difference between the averages.
Recall,66.5116,0.0,There is a significant difference between the averages.
F1-score,66.3262,0.0,There is a significant difference between the averages.


In [64]:
EXPpergunta1_svm_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_svm",
                                "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_svm")
EXPpergunta1_svm_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,37.5314,0.0,There is a significant difference between the averages.
Precision,179.6874,0.0,There is a significant difference between the averages.
Recall,37.5314,0.0,There is a significant difference between the averages.
F1-score,25.8305,0.0,There is a significant difference between the averages.


In [63]:
EXPpergunta1_gboost_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_gboost",
                                "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_gboost")
EXPpergunta1_gboost_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,180.8501,0.0,There is a significant difference between the averages.
Precision,173.0441,0.0,There is a significant difference between the averages.
Recall,180.8501,0.0,There is a significant difference between the averages.
F1-score,177.9433,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [66]:
EXPpergunta2_rl_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rl",
                                "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rl")
EXPpergunta2_rl_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,44.2831,0.0,There is a significant difference between the averages.
Precision,46.8631,0.0,There is a significant difference between the averages.
Recall,44.2831,0.0,There is a significant difference between the averages.
F1-score,41.35,0.0,There is a significant difference between the averages.


In [65]:
EXPpergunta2_rf_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rf",
                                "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rf")
EXPpergunta2_rf_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,151.6412,0.0,There is a significant difference between the averages.
Precision,161.3413,0.0,There is a significant difference between the averages.
Recall,151.6412,0.0,There is a significant difference between the averages.
F1-score,147.7856,0.0,There is a significant difference between the averages.


In [67]:
EXPpergunta2_svm_corr = comparacao("2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_svm",
                                "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_svm")
EXPpergunta2_svm_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,270.8268,0.0,There is a significant difference between the averages.
Precision,171.6108,0.0,There is a significant difference between the averages.
Recall,270.8268,0.0,There is a significant difference between the averages.
F1-score,289.4107,0.0,There is a significant difference between the averages.


In [68]:
EXPpergunta2_gboost_corr = comparacao("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost",
                                "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_gboost")
EXPpergunta2_gboost_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0549,0.8149,There is no significant difference between the means.
Precision,0.1705,0.6801,There is no significant difference between the means.
Recall,0.0549,0.8149,There is no significant difference between the means.
F1-score,0.1012,0.7508,There is no significant difference between the means.


### Is there a difference between the models predicted with all variables and with the variables selected by correlation?

#### MS

In [69]:
EXPpergunta3_corr_rlMS = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rl")
EXPpergunta3_corr_rlMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,21.8455,0.0,There is a significant difference between the averages.
Precision,22.1012,0.0,There is a significant difference between the averages.
Recall,21.8455,0.0,There is a significant difference between the averages.
F1-score,23.9067,0.0,There is a significant difference between the averages.


In [70]:
EXPpergunta3_corr_rfMS = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_rf")
EXPpergunta3_corr_rfMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,24.6911,0.0,There is a significant difference between the averages.
Precision,26.5542,0.0,There is a significant difference between the averages.
Recall,24.6911,0.0,There is a significant difference between the averages.
F1-score,24.3752,0.0,There is a significant difference between the averages.


In [71]:
EXPpergunta3_corr_svmMS = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_svm")
EXPpergunta3_corr_svmMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,23.4541,0.0,There is a significant difference between the averages.
Precision,211.0962,0.0,There is a significant difference between the averages.
Recall,23.4541,0.0,There is a significant difference between the averages.
F1-score,12.8932,0.0004,There is a significant difference between the averages.


In [72]:
EXPpergunta3_corr_gboostMS = comparacao("2- Exposure to pesticides/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_gboost",
                              "2- Exposure to pesticides/7- Analise_correlacao/1- Ministry_of_Health/4- Metrics/EXPtabela_metricasMS_corr_gboost")
EXPpergunta3_corr_gboostMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,4.1239,0.0436,There is a significant difference between the averages.
Precision,2.8186,0.0948,There is no significant difference between the means.
Recall,4.1239,0.0436,There is a significant difference between the averages.
F1-score,4.1245,0.0436,There is a significant difference between the averages.


#### MS + ART

In [73]:
EXPpergunta3_corr_rlMSArt = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rl")
EXPpergunta3_corr_rlMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.1116,0.7387,There is no significant difference between the means.
Precision,0.1614,0.6883,There is no significant difference between the means.
Recall,0.1116,0.7387,There is no significant difference between the means.
F1-score,0.0645,0.7997,There is no significant difference between the means.


In [75]:
EXPpergunta3_corr_rfMSArt = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_rf")
EXPpergunta3_corr_rfMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,60.7148,0.0,There is a significant difference between the averages.
Precision,68.0708,0.0,There is a significant difference between the averages.
Recall,60.7148,0.0,There is a significant difference between the averages.
F1-score,59.0086,0.0,There is a significant difference between the averages.


In [74]:
EXPpergunta3_corr_svmMSArt = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_svm")
EXPpergunta3_corr_svmMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,49.4802,0.0,There is a significant difference between the averages.
Precision,0.6457,0.4226,There is no significant difference between the means.
Recall,49.4802,0.0,There is a significant difference between the averages.
F1-score,57.2924,0.0,There is a significant difference between the averages.


In [76]:
EXPpergunta3_corr_gboostMSArt = comparacao("2- Exposure to pesticides/2.1- Ministry_of_Health + article - Quimio/4- Metrics/RStabela_metricasMSart_gboost",
                              "2- Exposure to pesticides/7- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/EXPtabela_metricasMSart_corr_gboost")
EXPpergunta3_corr_gboostMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,22.2655,0.0,There is a significant difference between the averages.
Precision,24.3199,0.0,There is a significant difference between the averages.
Recall,22.2655,0.0,There is a significant difference between the averages.
F1-score,21.7814,0.0,There is a significant difference between the averages.


#### ART

In [77]:
EXPpergunta3_corr_rlArt = comparacao("2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rl")
EXPpergunta3_corr_rlArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,223.9161,0.0,There is a significant difference between the averages.
Precision,187.5524,0.0,There is a significant difference between the averages.
Recall,223.9161,0.0,There is a significant difference between the averages.
F1-score,327.842,0.0,There is a significant difference between the averages.


In [78]:
EXPpergunta3_corr_rfArt = comparacao("2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_rf")
EXPpergunta3_corr_rfArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,631.435,0.0,There is a significant difference between the averages.
Precision,596.1925,0.0,There is a significant difference between the averages.
Recall,631.435,0.0,There is a significant difference between the averages.
F1-score,675.2712,0.0,There is a significant difference between the averages.


In [79]:
EXPpergunta3_corr_svmArt = comparacao("2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_svm")
EXPpergunta3_corr_svmArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1301.6177,0.0,There is a significant difference between the averages.
Precision,7495.7271,0.0,There is a significant difference between the averages.
Recall,1301.6177,0.0,There is a significant difference between the averages.
F1-score,4260.3492,0.0,There is a significant difference between the averages.


In [80]:
EXPpergunta3_corr_gboostArt = comparacao("2- Exposure to pesticides/3.1- Article - Quimio/4- Metrics/EXPtabela_metricasArt_gboost",
                              "2- Exposure to pesticides/7- Analise_correlacao/3- Article/4- Metrics/EXPtabela_metricasArt_corr_gboost")
EXPpergunta3_corr_gboostArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,767.9337,0.0,There is a significant difference between the averages.
Precision,307.7961,0.0,There is a significant difference between the averages.
Recall,767.9337,0.0,There is a significant difference between the averages.
F1-score,555.5478,0.0,There is a significant difference between the averages.


#### MS Oversampling

In [81]:
EXPpergunta3_corr_rlMS0 = comparacao("2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rl")
EXPpergunta3_corr_rlMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,35.8815,0.0,There is a significant difference between the averages.
Precision,28.3403,0.0,There is a significant difference between the averages.
Recall,35.8815,0.0,There is a significant difference between the averages.
F1-score,38.041,0.0,There is a significant difference between the averages.


In [82]:
EXPpergunta3_corr_rfMS0 = comparacao("2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_rf")
EXPpergunta3_corr_rfMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2.1476,0.1444,There is no significant difference between the means.
Precision,1.3253,0.251,There is no significant difference between the means.
Recall,2.1476,0.1444,There is no significant difference between the means.
F1-score,2.1374,0.1453,There is no significant difference between the means.


In [83]:
EXPpergunta3_corr_svmMS0 = comparacao("2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_svm")
EXPpergunta3_corr_svmMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.2129,0.2721,There is no significant difference between the means.
Precision,3.6068,0.059,There is no significant difference between the means.
Recall,1.2129,0.2721,There is no significant difference between the means.
F1-score,5.2945,0.0224,There is a significant difference between the averages.


In [84]:
EXPpergunta3_corr_gboostMS0 = comparacao("2- Exposure to pesticides/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_gboost",
                              "2- Exposure to pesticides/7- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/EXPtabela_metricasMS0_corr_gboost")
EXPpergunta3_corr_gboostMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.2327,0.63,There is no significant difference between the means.
Precision,0.4961,0.4821,There is no significant difference between the means.
Recall,0.2327,0.63,There is no significant difference between the means.
F1-score,0.292,0.5895,There is no significant difference between the means.


#### MS + ART Oversampling

In [85]:
EXPpergunta3_corr_rlMSArt0 = comparacao("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rl",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rl")
EXPpergunta3_corr_rlMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,30.4212,0.0,There is a significant difference between the averages.
Precision,28.6249,0.0,There is a significant difference between the averages.
Recall,30.4212,0.0,There is a significant difference between the averages.
F1-score,30.4278,0.0,There is a significant difference between the averages.


In [87]:
EXPpergunta3_corr_rfMSArt0 = comparacao("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_rf",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_rf")
EXPpergunta3_corr_rfMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.3559,0.5515,There is no significant difference between the means.
Precision,0.8733,0.3512,There is no significant difference between the means.
Recall,0.3559,0.5515,There is no significant difference between the means.
F1-score,0.215,0.6434,There is no significant difference between the means.


In [88]:
EXPpergunta3_corr_svmMSArt0 = comparacao("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_svm",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_svm")
EXPpergunta3_corr_svmMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,4.3151,0.0391,There is a significant difference between the averages.
Precision,2.3567,0.1263,There is no significant difference between the means.
Recall,4.3151,0.0391,There is a significant difference between the averages.
F1-score,4.9777,0.0268,There is a significant difference between the averages.


In [86]:
EXPpergunta3_corr_gboostMSArt0 = comparacao("2- Exposure to pesticides/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/EXPtabela_metricasMSart0_gboost",
                              "2- Exposure to pesticides/7- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/EXPtabela_metricasMSart0_corr_gboost")
EXPpergunta3_corr_gboostMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0549,0.8149,There is no significant difference between the means.
Precision,0.1705,0.6801,There is no significant difference between the means.
Recall,0.0549,0.8149,There is no significant difference between the means.
F1-score,0.1012,0.7508,There is no significant difference between the means.


# Prognosis prediction

## Structured data

### Ministry of Health

In [114]:
WPtabela_MS = tabela("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl",
                    "3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rf",
                    "3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_svm",
                    "3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_gboost")
WPtabela_MS

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,62.46% (51.47% - 76.47%),64.25% (51.5% - 78.47%),62.46% (51.47% - 76.47%),61.98% (50.47% - 76.43%)
RF,70.28% (58.82% - 80.88%),71.42% (59.37% - 81.37%),70.28% (58.82% - 80.88%),70.19% (58.57% - 80.97%)
SVM,67.79% (48.53% - 80.88%),80.06% (72.52% - 86.29%),67.79% (48.53% - 80.88%),64.6% (36.04% - 80.28%)
GBOOST,70.6% (57.35% - 80.88%),71.49% (57.13% - 81.72%),70.6% (57.35% - 80.88%),70.54% (56.77% - 80.86%)


In [115]:
# Saving the tables obtained
tabela_WPtabela_MS = WPtabela_MS.to_excel("tabela_metricas_WPtabela_MS.xlsx")

In [116]:
WPhiperp_MS = tabela_hiperparametros("3- Worst prognosis/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                    "3- Worst prognosis/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                    "3- Worst prognosis/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                    "3- Worst prognosis/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
WPhiperp_MS

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='gini', n_estimators=400, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=100, subsample=0.8"


In [117]:
# Saving the tables obtained
tabela_WPhiperp_MS = WPhiperp_MS.to_excel("tabela_metricas_WPhiperp_MS.xlsx")

In [94]:
WPgrupo_MS = ["3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl",
                    "3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rf",
                    "3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_svm",
                    "3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_gboost"]
WPresultado_MS = comparacao_entre_modelos(WPgrupo_MS)

# If you want to print the results
for key, df in WPresultado_MS.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl x 3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rf:
          Estatística F Valor p  \
Accuracy       113.6458     0.0   
Precision       90.3899     0.0   
Recall         113.6458     0.0   
F1-score       122.0985     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl x 3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_svm:
          Estatística F Valor p  \
Accuracy        41.6523     0.0   
Precision      593.0183     0.0   
Recall          41.6523     0.0   
F1-sco

### Variables with significance

In [118]:
WPtabela_Art = tabela("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rl",
                     "3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rf",
                      "3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_svm",
                     "3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_gboost")
WPtabela_Art

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,88.19% (81.03% - 93.1%),89.94% (82.76% - 93.99%),88.19% (81.03% - 93.1%),88.08% (80.85% - 93.1%)
RF,86.76% (75.86% - 94.83%),87.91% (76.7% - 94.87%),86.76% (75.86% - 94.83%),86.67% (75.92% - 94.82%)
SVM,88.1% (82.76% - 96.55%),90.12% (82.79% - 96.75%),88.1% (82.76% - 96.55%),87.99% (82.59% - 96.53%)
GBOOST,87.67% (79.31% - 94.83%),88.36% (79.31% - 95.36%),87.67% (79.31% - 94.83%),87.66% (79.06% - 94.84%)


In [119]:
# Saving the tables obtained
tabela_WPtabela_Art = WPtabela_Art.to_excel("tabela_metricas_WPtabela_Art.xlsx")

In [120]:
WPhiperp_Art = tabela_hiperparametros("3- Worst prognosis/3.1- Article - Quimio/5- Best model/melhor_modelo_RL",
                                     "3- Worst prognosis/3.1- Article - Quimio/5- Best model/melhor_modelo_RF",
                                     "3- Worst prognosis/3.1- Article - Quimio/5- Best model/melhor_modelo_SVM",
                                     "3- Worst prognosis/3.1- Article - Quimio/5- Best model/melhor_modelo_GBoost")
WPhiperp_Art

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=100, subsample=0.8"


In [121]:
# Saving the tables obtained
tabela_WPhiperp_Art = WPhiperp_Art.to_excel("tabela_metricas_WPhiperp_Art.xlsx")

In [95]:
WPgrupo_Art = ["3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rl",
                     "3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rf",
                      "3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_svm",
                     "3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_gboost"]
WPresultado_Art = comparacao_entre_modelos(WPgrupo_Art)

# If you want to print the results
for key, df in WPresultado_Art.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rl x 3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rf:
          Estatística F Valor p  \
Accuracy         8.1143  0.0049   
Precision       19.5668     0.0   
Recall           8.1143  0.0049   
F1-score          7.636  0.0063   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rl x 3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_svm:
          Estatística F Valor p  \
Accuracy         0.0396  0.8426   
Precision        0.2752  0.6005   
Recall           0.0396  0.8426   
F1

### Ministry of Health + Variables with significance

In [122]:
WPtabela_MSArt = tabela("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl",
                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rf",
                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_svm",
                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_gboost")
WPtabela_MSArt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,93.2% (85.71% - 100.0%),93.62% (85.86% - 100.0%),93.2% (85.71% - 100.0%),93.19% (85.66% - 100.0%)
RF,91.54% (85.71% - 98.21%),92.03% (85.86% - 98.3%),91.54% (85.71% - 98.21%),91.53% (85.68% - 98.22%)
SVM,91.8% (85.71% - 98.21%),92.47% (87.5% - 98.28%),91.8% (85.71% - 98.21%),91.78% (85.71% - 98.22%)
GBOOST,91.36% (85.71% - 96.43%),91.81% (85.71% - 96.7%),91.36% (85.71% - 96.43%),91.34% (85.43% - 96.44%)


In [123]:
# Saving the tables obtained
tabela_WPtabela_MSArt = WPtabela_MSArt.to_excel("tabela_metricas_WPtabela_MSArt.xlsx")

In [124]:
WPhiperp_MSArt = tabela_hiperparametros("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RL",
                                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RF",
                                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_SVM",
                                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSArt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=4, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=5, n_estimators=100, subsample=0.8"


In [125]:
# Saving the tables obtained
tabela_WPhiperp_MSArt = WPhiperp_MSArt.to_excel("tabela_metricas_WPhiperp_MSArt.xlsx")

In [96]:
WPgrupo_MSArt = ["3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl",
                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rf",
                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_svm",
                       "3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_gboost"]
WPresultado_MSArt = comparacao_entre_modelos(WPgrupo_MSArt)

# If you want to print the results
for key, df in WPresultado_MSArt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl x 3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rf:
          Estatística F Valor p  \
Accuracy        16.6382  0.0001   
Precision       18.6207     0.0   
Recall          16.6382  0.0001   
F1-score        16.5594  0.0001   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl x 3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_svm:
          Estatística F Valor p  \
Accuracy       

### Ministry of Health + Exposure to pesticides

In [126]:
WPtabela_MSExp = tabela("3- Worst prognosis/4- Ministry_of_Health + EXP/4- Metrics/WPtabela_metricasMSEXP_rl",
                       "3- Worst prognosis/4- Ministry_of_Health + EXP/4- Metrics/WPtabela_metricasMSEXP_rf",
                       "3- Worst prognosis/4- Ministry_of_Health + EXP/4- Metrics/WPtabela_metricasMSEXP_rl",
                       "3- Worst prognosis/4- Ministry_of_Health + EXP/4- Metrics/WPtabela_metricasMSEXP_gboost")
WPtabela_MSExp

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,57.88% (44.62% - 72.31%),59.69% (44.64% - 76.07%),57.88% (44.62% - 72.31%),57.42% (44.46% - 72.28%)
RF,69.46% (56.92% - 83.08%),70.55% (56.61% - 84.83%),69.46% (56.92% - 83.08%),69.33% (55.77% - 82.96%)
SVM,57.88% (44.62% - 72.31%),59.69% (44.64% - 76.07%),57.88% (44.62% - 72.31%),57.42% (44.46% - 72.28%)
GBOOST,70.91% (56.92% - 81.54%),72.16% (56.91% - 82.06%),70.91% (56.92% - 81.54%),70.8% (56.86% - 81.64%)


In [127]:
# Saving the tables obtained
tabela_WPtabela_MSExp = WPtabela_MSExp.to_excel("tabela_metricas_WPtabela_MSExp.xlsx")

In [128]:
WPhiperp_MSExp = tabela_hiperparametros("3- Worst prognosis/4- Ministry_of_Health + EXP/5- Best model/melhor_modelo_RL",
                                       "3- Worst prognosis/4- Ministry_of_Health + EXP/5- Best model/melhor_modelo_RF",
                                       "3- Worst prognosis/4- Ministry_of_Health + EXP/5- Best model/melhor_modelo_SVM",
                                       "3- Worst prognosis/4- Ministry_of_Health + EXP/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSExp

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=50, subsample=0.8"


In [129]:
# Saving the tables obtained
tabela_WPhiperp_MSExp = WPhiperp_MSExp.to_excel("tabela_metricas_WPhiperp_MSExp.xlsx")

In [None]:
WPgrupo_MS = []
WPresultado_MS = comparacao_entre_modelos(WPgrupo_MS)

# If you want to print the results
for key, df in WPresultado_MS.items():
    print(f"Results for {key}:")
    print(df, "\n")

### Ministry of Health with Oversampling

In [130]:
WPtabela_MS0 = tabela("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl",
                     "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rf",
                     "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_svm",
                     "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_gboost")
WPtabela_MS0

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,64.9% (55.11% - 71.59%),65.61% (55.38% - 72.52%),64.9% (55.11% - 71.59%),64.74% (54.94% - 71.61%)
RF,77.12% (70.45% - 83.52%),77.96% (71.03% - 84.95%),77.12% (70.45% - 83.52%),77.01% (70.37% - 83.44%)
SVM,64.8% (55.11% - 71.59%),65.45% (56.25% - 72.79%),64.8% (55.11% - 71.59%),64.61% (55.03% - 71.53%)
GBOOST,76.32% (69.89% - 82.39%),76.73% (69.9% - 82.98%),76.32% (69.89% - 82.39%),76.29% (69.89% - 82.38%)


In [131]:
# Saving the tables obtained
tabela_WPtabela_MS0 = WPtabela_MS0.to_excel("tabela_metricas_WPtabela_MS0.xlsx")

In [132]:
WPhiperp_MS0 = tabela_hiperparametros("3- Worst prognosis/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                     "3- Worst prognosis/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                     "3- Worst prognosis/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                     "3- Worst prognosis/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
WPhiperp_MS0

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=50, subsample=1.0"


In [133]:
# Saving the tables obtained
tabela_WPhiperp_MS0 = WPhiperp_MS0.to_excel("tabela_metricas_WPhiperp_MS0.xlsx")

In [97]:
WPgrupo_MS0 = ["3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl",
                     "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rf",
                     "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_svm",
                     "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_gboost"]
WPresultado_MS0 = comparacao_entre_modelos(WPgrupo_MS0)

# If you want to print the results
for key, df in WPresultado_MS0.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl x 3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rf:
          Estatística F Valor p  \
Accuracy       933.4939     0.0   
Precision      853.7312     0.0   
Recall         933.4939     0.0   
F1-score       933.4131     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl x 3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_svm:
          Estatística F Valor p  \
Accuracy         0.0455  0.8314   
Precision         0.

### Ministry of Health + Variables with significance with Oversampling

In [134]:
WPtabela_MSArt0 = tabela("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl",
                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rf",
                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm",
                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_gboost")
WPtabela_MSArt0

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,92.27% (88.03% - 96.58%),92.66% (88.03% - 96.8%),92.27% (88.03% - 96.58%),92.27% (87.96% - 96.58%)
RF,93.96% (88.03% - 98.29%),94.26% (88.03% - 98.35%),93.96% (88.03% - 98.29%),93.95% (88.03% - 98.29%)
SVM,92.08% (87.18% - 96.58%),92.83% (88.26% - 96.81%),92.08% (87.18% - 96.58%),92.05% (86.89% - 96.58%)
GBOOST,92.97% (87.18% - 97.44%),93.26% (87.28% - 97.56%),92.97% (87.18% - 97.44%),92.97% (87.17% - 97.44%)


In [135]:
# Saving the tables obtained
tabela_WPtabela_MSArt0 = WPtabela_MSArt0.to_excel("tabela_metricas_WPtabela_MSArt0.xlsx")

In [136]:
WPhiperp_MSArt0 = tabela_hiperparametros("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RL",
                                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RF",
                                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_SVM",
                                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSArt0

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0"


In [137]:
# Saving the tables obtained
tabela_WPhiperp_MSArt0 = WPhiperp_MSArt0.to_excel("tabela_metricas_WPhiperp_MSArt0.xlsx")

In [98]:
WPgrupo_MSArt0 = ["3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl",
                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rf",
                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm",
                        "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_gboost"]
WPresultado_MSArt0 = comparacao_entre_modelos(WPgrupo_MSArt0)

# If you want to print the results
for key, df in WPresultado_MSArt0.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl x 3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rf:
          Estatística F Valor p  \
Accuracy        36.8599     0.0   
Precision       35.8276     0.0   
Recall          36.8599     0.0   
F1-score        36.7096     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl x 3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm:
  

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [99]:
WPpergunta1_rl = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl",
                                "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl")
WPpergunta1_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,15.6777,0.0001,There is a significant difference between the averages.
Precision,4.2059,0.0416,There is a significant difference between the averages.
Recall,15.6777,0.0001,There is a significant difference between the averages.
F1-score,19.5752,0.0,There is a significant difference between the averages.


In [100]:
WPpergunta1_rf = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rf",
                                "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rf")
WPpergunta1_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,146.9971,0.0,There is a significant difference between the averages.
Precision,139.4692,0.0,There is a significant difference between the averages.
Recall,146.9971,0.0,There is a significant difference between the averages.
F1-score,144.3519,0.0,There is a significant difference between the averages.


In [101]:
WPpergunta1_svm = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_svm",
                                "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_svm")
WPpergunta1_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,17.5966,0.0,There is a significant difference between the averages.
Precision,1092.2174,0.0,There is a significant difference between the averages.
Recall,17.5966,0.0,There is a significant difference between the averages.
F1-score,0.0004,0.9832,There is no significant difference between the means.


In [102]:
WPpergunta1_gboost = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_gboost",
                                "3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_gboost")
WPpergunta1_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,111.7866,0.0,There is a significant difference between the averages.
Precision,88.7276,0.0,There is a significant difference between the averages.
Recall,111.7866,0.0,There is a significant difference between the averages.
F1-score,110.8949,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [103]:
WPpergunta2_rl = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl",
                                "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl")
WPpergunta2_rl

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,6.4576,0.0118,There is a significant difference between the averages.
Precision,8.2189,0.0046,There is a significant difference between the averages.
Recall,6.4576,0.0118,There is a significant difference between the averages.
F1-score,6.4517,0.0119,There is a significant difference between the averages.


In [104]:
WPpergunta2_rf = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rf",
                                "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rf")
WPpergunta2_rf

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,52.9376,0.0,There is a significant difference between the averages.
Precision,52.5121,0.0,There is a significant difference between the averages.
Recall,52.9376,0.0,There is a significant difference between the averages.
F1-score,52.7632,0.0,There is a significant difference between the averages.


In [105]:
WPpergunta2_svm = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_svm",
                                "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm")
WPpergunta2_svm

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.7618,0.3838,There is no significant difference between the means.
Precision,1.5667,0.2122,There is no significant difference between the means.
Recall,0.7618,0.3838,There is no significant difference between the means.
F1-score,0.7055,0.402,There is no significant difference between the means.


In [106]:
WPpergunta2_gboost = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_gboost",
                                "3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_gboost")
WPpergunta2_gboost

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,24.838,0.0,There is a significant difference between the averages.
Precision,21.151,0.0,There is a significant difference between the averages.
Recall,24.838,0.0,There is a significant difference between the averages.
F1-score,25.0295,0.0,There is a significant difference between the averages.


## Structured data with aggravated risk stratification for patients exposed to pesticides

### Ministry of Health

In [138]:
WPtabela_MS_alt = tabela("3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rl",
                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rf",
                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_svm",
                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_gboost")
WPtabela_MS_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,64.06% (50.0% - 75.0%),65.61% (50.17% - 76.54%),64.06% (50.0% - 75.0%),63.85% (49.91% - 74.96%)
RF,69.51% (58.82% - 82.35%),70.55% (59.04% - 83.04%),69.51% (58.82% - 82.35%),69.37% (58.1% - 82.41%)
SVM,72.71% (52.94% - 83.82%),81.93% (71.36% - 87.45%),72.71% (52.94% - 83.82%),70.46% (46.64% - 83.32%)
GBOOST,69.1% (50.0% - 83.82%),70.04% (50.0% - 83.85%),69.1% (50.0% - 83.82%),68.99% (50.0% - 83.82%)


In [139]:
# Saving the tables obtained
tabela_WPtabela_MS_alt = WPtabela_MS_alt.to_excel("tabela_metricas_WPtabela_MS_alt.xlsx")

In [140]:
WPhiperp_MS_alt = tabela_hiperparametros("3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
WPhiperp_MS_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=1.0"


In [141]:
# Saving the tables obtained
tabela_WPhiperp_MS_alt = WPhiperp_MS_alt.to_excel("tabela_metricas_WPhiperp_MS_alt.xlsx")

In [107]:
WPgrupo_MS_alt = ["3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rl",
                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rf",
                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_svm",
                        "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_gboost"]
WPresultado_MS_alt = comparacao_entre_modelos(WPgrupo_MS_alt)

# If you want to print the results
for key, df in WPresultado_MS_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rf:
          Estatística F Valor p  \
Accuracy        59.3914     0.0   
Precision       50.1063     0.0   
Recall          59.3914     0.0   
F1-score         59.449     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_svm:
          Estatística 

### Variables with significance

In [142]:
WPtabela_Art_alt = tabela("3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rl",
                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rf",
                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_svm",
                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_gboost")
WPtabela_Art_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,90.69% (84.48% - 98.28%),91.73% (85.03% - 98.33%),90.69% (84.48% - 98.28%),90.64% (84.05% - 98.27%)
RF,91.1% (82.76% - 96.55%),91.97% (83.0% - 96.55%),91.1% (82.76% - 96.55%),91.05% (82.78% - 96.55%)
SVM,91.09% (84.48% - 96.55%),92.24% (86.32% - 96.8%),91.09% (84.48% - 96.55%),91.03% (84.19% - 96.56%)
GBOOST,90.48% (79.31% - 96.55%),91.18% (79.16% - 96.55%),90.48% (79.31% - 96.55%),90.44% (78.82% - 96.55%)


In [143]:
# Saving the tables obtained
tabela_WPtabela_Art_alt = WPtabela_Art_alt.to_excel("tabela_metricas_WPtabela_Art_alt.xlsx")

In [144]:
WPhiperp_Art_alt = tabela_hiperparametros("3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_RL",
                                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_RF",
                                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_SVM",
                                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/5- Best model/melhor_modelo_GBoost")
WPhiperp_Art_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [145]:
# Saving the tables obtained
tabela_WPhiperp_Art_alt = WPhiperp_Art_alt.to_excel("tabela_metricas_WPhiperp_Art_alt.xlsx")

In [108]:
WPgrupo_Art_alt = ["3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rl",
                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rf",
                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_svm",
                         "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_gboost"]
WPresultado_Art_alt = comparacao_entre_modelos(WPgrupo_Art_alt)

# If you want to print the results
for key, df in WPresultado_Art_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rl x 3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rf:
          Estatística F Valor p  \
Accuracy         1.2045  0.2738   
Precision        0.5031   0.479   
Recall           1.2045  0.2738   
F1-score         1.1754  0.2796   

                                                        Mensagem  
Accuracy   There is no significant difference between the means.  
Precision  There is no significant difference between the means.  
Recall     There is no significant difference between the means.  
F1-score   There is no significant difference between the means.   

Results for 3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rl x 3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_svm:
          Estatística F 

### Ministry of Health + Exposed to pesticides

In [146]:
WPtabela_MSExp_alt = tabela("3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/4- Metrics/WPtabela_metricasMSEXP_alt_rl",
                           "3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/4- Metrics/WPtabela_metricasMSEXP_alt_rf",
                           "3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/4- Metrics/WPtabela_metricasMSEXP_alt_svm",
                           "3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/4- Metrics/WPtabela_metricasMSEXP_alt_gboost")
WPtabela_MSExp_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,59.29% (43.08% - 72.31%),61.48% (43.26% - 77.03%),59.29% (43.08% - 72.31%),58.87% (43.08% - 71.94%)
RF,70.74% (56.92% - 80.0%),71.75% (56.91% - 81.4%),70.74% (56.92% - 80.0%),70.67% (56.86% - 80.0%)
SVM,70.08% (46.15% - 83.08%),79.69% (56.78% - 85.56%),70.08% (46.15% - 83.08%),67.72% (38.95% - 82.55%)
GBOOST,71.85% (56.92% - 84.62%),73.01% (57.11% - 86.19%),71.85% (56.92% - 84.62%),71.78% (56.8% - 84.68%)


In [147]:
# Saving the tables obtained
tabela_WPtabela_MSExp_alt = WPtabela_MSExp_alt.to_excel("tabela_metricas_WPtabela_MSExp_alt.xlsx")

In [148]:
WPhiperp_MSExp_alt = tabela_hiperparametros("3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/5- Best model/melhor_modelo_RL",
                                           "3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/5- Best model/melhor_modelo_RF",
                                           "3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/5- Best model/melhor_modelo_SVM",
                                           "3- Worst prognosis/7- Analise_dados_alterados/4- MH + Exp/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSExp_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=100, subsample=0.8"


In [149]:
# Saving the tables obtained
tabela_WPhiperp_MSExp_alt = WPhiperp_MSExp_alt.to_excel("tabela_metricas_WPhiperp_MSExp_alt.xlsx")

### Ministry of Health + Variables with significance

In [163]:
WPtabela_MSArt_alt = tabela("3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rl",
                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rf",
                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_svm",
                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_gboost")
WPtabela_MSArt_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,89.91% (78.57% - 98.21%),90.66% (81.39% - 98.29%),89.91% (78.57% - 98.21%),89.89% (79.09% - 98.22%)
RF,91.57% (76.79% - 100.0%),92.14% (80.09% - 100.0%),91.57% (76.79% - 100.0%),91.56% (76.31% - 100.0%)
SVM,89.39% (73.21% - 98.21%),90.12% (73.28% - 98.28%),89.39% (73.21% - 98.21%),89.37% (73.22% - 98.22%)
GBOOST,91.84% (76.79% - 100.0%),92.29% (80.52% - 100.0%),91.84% (76.79% - 100.0%),91.83% (76.59% - 100.0%)


In [164]:
# Saving the tables obtained
tabela_WPtabela_MSArt_alt = WPtabela_MSArt_alt.to_excel("tabela_metricas_WPtabela_MSArt_alt.xlsx")

In [165]:
WPhiperp_MSArt_alt = tabela_hiperparametros("3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RL",
                                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_RF",
                                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_SVM",
                                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSArt_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0"


In [166]:
# Saving the tables obtained
tabela_WPhiperp_MSArt_alt = WPhiperp_MSArt_alt.to_excel("tabela_metricas_WPhiperp_MSArt_alt.xlsx")

In [109]:
WPgrupo_MSArt_alt = ["3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rl",
                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rf",
                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_svm",
                           "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_gboost"]
WPresultado_MSArt_alt = comparacao_entre_modelos(WPgrupo_MSArt_alt)

# If you want to print the results
for key, df in WPresultado_MSArt_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rf:
          Estatística F Valor p  \
Accuracy         9.0426   0.003   
Precision        8.8932  0.0032   
Recall           9.0426   0.003   
F1-score         9.0095   0.003   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministr

### Ministry of Health with Oversampling

In [151]:
WPtabela_MS0_alt = tabela("3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rl",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rf",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_svm",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_gboost")
WPtabela_MS0_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,60.87% (53.41% - 69.89%),61.49% (54.18% - 71.09%),60.87% (53.41% - 69.89%),60.68% (52.94% - 69.45%)
RF,78.47% (72.16% - 83.52%),78.94% (72.14% - 83.71%),78.47% (72.16% - 83.52%),78.41% (72.15% - 83.39%)
SVM,65.0% (49.43% - 77.84%),74.94% (61.53% - 82.27%),65.0% (49.43% - 77.84%),61.33% (39.64% - 76.93%)
GBOOST,76.47% (70.45% - 81.82%),76.75% (70.5% - 82.07%),76.47% (70.45% - 81.82%),76.45% (70.44% - 81.83%)


In [152]:
# Saving the tables obtained
tabela_WPtabela_MS0_alt = WPtabela_MS0_alt.to_excel("tabela_metricas_WPtabela_MS0_alt.xlsx")

In [153]:
WPhiperp_MS0_alt = tabela_hiperparametros("3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
WPhiperp_MS0_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=100, subsample=0.8"


In [154]:
# Saving the tables obtained
tabela_WPhiperp_MS0_alt = WPhiperp_MS0_alt.to_excel("tabela_metricas_WPhiperp_MS0_alt.xlsx")

In [110]:
WPgrupo_MS0_alt = ["3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rl",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rf",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_svm",
                                         "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_gboost"]
WPresultado_MS0_alt = comparacao_entre_modelos(WPgrupo_MS0_alt)

# If you want to print the results
for key, df in WPresultado_MS0_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rf:
          Estatística F Valor p  \
Accuracy      1648.1289     0.0   
Precision     1498.9779     0.0   
Recall        1648.1289     0.0   
F1-score      1676.8386     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metr

### Ministry of Health + Variables with significance with Oversampling

In [159]:
WPtabela_MSArt0_alt = tabela("3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rl",
                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rf",
                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_svm",
                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_gboost")
WPtabela_MSArt0_alt

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,93.54% (88.03% - 98.29%),93.84% (88.88% - 98.36%),93.54% (88.03% - 98.29%),93.53% (88.1% - 98.3%)
RF,94.72% (90.6% - 98.29%),95.1% (90.6% - 98.35%),94.72% (90.6% - 98.29%),94.71% (90.59% - 98.29%)
SVM,92.25% (85.47% - 97.44%),92.51% (85.45% - 97.45%),92.25% (85.47% - 97.44%),92.24% (85.46% - 97.44%)
GBOOST,93.2% (87.18% - 99.15%),93.42% (87.22% - 99.16%),93.2% (87.18% - 99.15%),93.19% (87.16% - 99.14%)


In [161]:
# Saving the tables obtained
tabela_WPtabela_MSArt0_alt = WPtabela_MSArt0_alt.to_excel("tabela_metricas_WPtabela_MSArt0_alt.xlsx")

In [160]:
WPhiperp_MSArt0_alt = tabela_hiperparametros("3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RL",
                                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_RF",
                                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_SVM",
                                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSArt0_alt

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=4, min_samples_split=15, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=100, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0"


In [162]:
# Saving the tables obtained
tabela_WPhiperp_MSArt0_alt = WPhiperp_MSArt0_alt.to_excel("tabela_metricas_WPhiperp_MSArt0_alt.xlsx")

In [112]:
WPgrupo_MSArt0_alt = ["3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rl",
                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rf",
                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_svm",
                           "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_gboost"]
WPresultado_MSArt0_alt = comparacao_entre_modelos(WPgrupo_MSArt0_alt)

# If you want to print the results
for key, df in WPresultado_MSArt0_alt.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rl x 3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rf:
          Estatística F Valor p  \
Accuracy        21.5371     0.0   
Precision       28.6654     0.0   
Recall          21.5371     0.0   
F1-score        21.2837     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rl x 3- Worst prognosis/7- 

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [113]:
WPpergunta1_rl_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rl",
                                "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rl")
WPpergunta1_rl_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,28.2674,0.0,There is a significant difference between the averages.
Precision,44.3401,0.0,There is a significant difference between the averages.
Recall,28.2674,0.0,There is a significant difference between the averages.
F1-score,27.8165,0.0,There is a significant difference between the averages.


In [114]:
WPpergunta1_rf_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rf",
                                "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rf")
WPpergunta1_rf_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,243.6202,0.0,There is a significant difference between the averages.
Precision,228.5081,0.0,There is a significant difference between the averages.
Recall,243.6202,0.0,There is a significant difference between the averages.
F1-score,241.6164,0.0,There is a significant difference between the averages.


In [115]:
WPpergunta1_svm_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_svm",
                                "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_svm")
WPpergunta1_svm_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,85.8823,0.0,There is a significant difference between the averages.
Precision,236.595,0.0,There is a significant difference between the averages.
Recall,85.8823,0.0,There is a significant difference between the averages.
F1-score,78.1428,0.0,There is a significant difference between the averages.


In [116]:
WPpergunta1_gboost_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_gboost",
                                "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_gboost")
WPpergunta1_gboost_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,144.6243,0.0,There is a significant difference between the averages.
Precision,119.6725,0.0,There is a significant difference between the averages.
Recall,144.6243,0.0,There is a significant difference between the averages.
F1-score,145.9069,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [117]:
WPpergunta2_rl_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rl",
                                "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rl")
WPpergunta2_rl_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,70.6969,0.0,There is a significant difference between the averages.
Precision,66.157,0.0,There is a significant difference between the averages.
Recall,70.6969,0.0,There is a significant difference between the averages.
F1-score,70.9899,0.0,There is a significant difference between the averages.


In [118]:
WPpergunta2_rf_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rf",
                                "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rf")
WPpergunta2_rf_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,53.9722,0.0,There is a significant difference between the averages.
Precision,59.1205,0.0,There is a significant difference between the averages.
Recall,53.9722,0.0,There is a significant difference between the averages.
F1-score,53.3868,0.0,There is a significant difference between the averages.


In [119]:
WPpergunta2_svm_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_svm",
                                "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_svm")
WPpergunta2_svm_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,32.5334,0.0,There is a significant difference between the averages.
Precision,27.213,0.0,There is a significant difference between the averages.
Recall,32.5334,0.0,There is a significant difference between the averages.
F1-score,32.4789,0.0,There is a significant difference between the averages.


In [120]:
WPpergunta2_gboost_alt = comparacao("3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_gboost",
                                "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_gboost")
WPpergunta2_gboost_alt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,8.2427,0.0045,There is a significant difference between the averages.
Precision,6.6727,0.0105,There is a significant difference between the averages.
Recall,8.2427,0.0045,There is a significant difference between the averages.
F1-score,8.2103,0.0046,There is a significant difference between the averages.


### Is there a difference between the models predicted with structured data and the models with restratified data?

#### MS

In [121]:
WPpergunta3_alt_rlMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl",
                              "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rl")
WPpergunta3_alt_rlMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,4.9182,0.0277,There is a significant difference between the averages.
Precision,3.1559,0.0772,There is no significant difference between the means.
Recall,4.9182,0.0277,There is a significant difference between the averages.
F1-score,6.5703,0.0111,There is a significant difference between the averages.


In [122]:
WPpergunta3_alt_rfMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rf",
                              "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_rf")
WPpergunta3_alt_rfMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.1303,0.289,There is no significant difference between the means.
Precision,1.6206,0.2045,There is no significant difference between the means.
Recall,1.1303,0.289,There is no significant difference between the means.
F1-score,1.2578,0.2634,There is no significant difference between the means.


In [123]:
WPpergunta3_alt_svmMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_svm",
                              "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_svm")
WPpergunta3_alt_svmMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,30.3438,0.0,There is a significant difference between the averages.
Precision,22.3791,0.0,There is a significant difference between the averages.
Recall,30.3438,0.0,There is a significant difference between the averages.
F1-score,29.0482,0.0,There is a significant difference between the averages.


In [124]:
WPpergunta3_alt_gboostMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_gboost",
                              "3- Worst prognosis/7- Analise_dados_alterados/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_alt_gboost")
WPpergunta3_alt_gboostMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,4.2562,0.0404,There is a significant difference between the averages.
Precision,3.868,0.0506,There is no significant difference between the means.
Recall,4.2562,0.0404,There is a significant difference between the averages.
F1-score,4.438,0.0364,There is a significant difference between the averages.


#### MS + ART

In [125]:
WPpergunta3_alt_rlMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl",
                              "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rl")
WPpergunta3_alt_rlMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,44.4446,0.0,There is a significant difference between the averages.
Precision,45.1762,0.0,There is a significant difference between the averages.
Recall,44.4446,0.0,There is a significant difference between the averages.
F1-score,44.5923,0.0,There is a significant difference between the averages.


In [127]:
WPpergunta3_alt_rfMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rf",
                              "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_rf")
WPpergunta3_alt_rfMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0056,0.9404,There is no significant difference between the means.
Precision,0.0611,0.8051,There is no significant difference between the means.
Recall,0.0056,0.9404,There is no significant difference between the means.
F1-score,0.0027,0.9589,There is no significant difference between the means.


In [126]:
WPpergunta3_alt_svmMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_svm",
                              "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_svm")
WPpergunta3_alt_svmMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,24.1268,0.0,There is a significant difference between the averages.
Precision,28.6326,0.0,There is a significant difference between the averages.
Recall,24.1268,0.0,There is a significant difference between the averages.
F1-score,23.8403,0.0,There is a significant difference between the averages.


In [128]:
WPpergunta3_alt_gboostMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_gboost",
                              "3- Worst prognosis/7- Analise_dados_alterados/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_alt_gboost")
WPpergunta3_alt_gboostMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.9373,0.3341,There is no significant difference between the means.
Precision,1.1148,0.2923,There is no significant difference between the means.
Recall,0.9373,0.3341,There is no significant difference between the means.
F1-score,0.9711,0.3256,There is no significant difference between the means.


#### ART

In [129]:
WPpergunta3_alt_rlArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rl",
                              "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rl")
WPpergunta3_alt_rlArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,40.2801,0.0,There is a significant difference between the averages.
Precision,29.9656,0.0,There is a significant difference between the averages.
Recall,40.2801,0.0,There is a significant difference between the averages.
F1-score,41.1475,0.0,There is a significant difference between the averages.


In [130]:
WPpergunta3_alt_rfArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rf",
                              "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_rf")
WPpergunta3_alt_rfArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,78.8622,0.0,There is a significant difference between the averages.
Precision,77.7729,0.0,There is a significant difference between the averages.
Recall,78.8622,0.0,There is a significant difference between the averages.
F1-score,79.0223,0.0,There is a significant difference between the averages.


In [131]:
WPpergunta3_alt_svmArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_svm",
                              "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_svm")
WPpergunta3_alt_svmArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,50.8132,0.0,There is a significant difference between the averages.
Precision,37.6275,0.0,There is a significant difference between the averages.
Recall,50.8132,0.0,There is a significant difference between the averages.
F1-score,51.2876,0.0,There is a significant difference between the averages.


In [133]:
WPpergunta3_alt_gboostArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_gboost",
                              "3- Worst prognosis/7- Analise_dados_alterados/3.1- Article - Quimio/4- Metrics/WPtabela_metricas_alt_art_gboost")
WPpergunta3_alt_gboostArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,31.9327,0.0,There is a significant difference between the averages.
Precision,37.2107,0.0,There is a significant difference between the averages.
Recall,31.9327,0.0,There is a significant difference between the averages.
F1-score,30.9038,0.0,There is a significant difference between the averages.


#### MS Oversampling

In [132]:
WPpergunta3_alt_rlMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl",
                              "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rl")
WPpergunta3_alt_rlMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,74.5162,0.0,There is a significant difference between the averages.
Precision,71.0395,0.0,There is a significant difference between the averages.
Recall,74.5162,0.0,There is a significant difference between the averages.
F1-score,75.8544,0.0,There is a significant difference between the averages.


In [134]:
WPpergunta3_alt_rfMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rf",
                              "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_rf")
WPpergunta3_alt_rfMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,14.0422,0.0002,There is a significant difference between the averages.
Precision,6.7079,0.0103,There is a significant difference between the averages.
Recall,14.0422,0.0002,There is a significant difference between the averages.
F1-score,14.9707,0.0001,There is a significant difference between the averages.


In [136]:
WPpergunta3_alt_svmMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_svm",
                              "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_svm")
WPpergunta3_alt_svmMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0975,0.7551,There is no significant difference between the means.
Precision,367.6287,0.0,There is a significant difference between the averages.
Recall,0.0975,0.7551,There is no significant difference between the means.
F1-score,17.5203,0.0,There is a significant difference between the averages.


In [137]:
WPpergunta3_alt_gboostMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_gboost",
                              "3- Worst prognosis/7- Analise_dados_alterados/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_alt_gboost")
WPpergunta3_alt_gboostMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.1564,0.693,There is no significant difference between the means.
Precision,0.005,0.9435,There is no significant difference between the means.
Recall,0.1564,0.693,There is no significant difference between the means.
F1-score,0.188,0.665,There is no significant difference between the means.


#### MS + ART Oversampling

In [138]:
WPpergunta3_alt_rlMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl",
                              "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rl")
WPpergunta3_alt_rlMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,21.2915,0.0,There is a significant difference between the averages.
Precision,19.5169,0.0,There is a significant difference between the averages.
Recall,21.2915,0.0,There is a significant difference between the averages.
F1-score,21.3454,0.0,There is a significant difference between the averages.


In [139]:
WPpergunta3_alt_rfMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rf",
                              "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_rf")
WPpergunta3_alt_rfMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,8.7202,0.0035,There is a significant difference between the averages.
Precision,12.5325,0.0005,There is a significant difference between the averages.
Recall,8.7202,0.0035,There is a significant difference between the averages.
F1-score,8.6081,0.0037,There is a significant difference between the averages.


In [140]:
WPpergunta3_alt_svmMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm",
                              "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_svm")
WPpergunta3_alt_svmMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.2712,0.6031,There is no significant difference between the means.
Precision,1.0124,0.3156,There is no significant difference between the means.
Recall,0.2712,0.6031,There is no significant difference between the means.
F1-score,0.3449,0.5577,There is no significant difference between the means.


In [141]:
WPpergunta3_alt_gboostMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_gboost",
                              "3- Worst prognosis/7- Analise_dados_alterados/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_alt_gboost")
WPpergunta3_alt_gboostMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.6112,0.4353,There is no significant difference between the means.
Precision,0.3243,0.5697,There is no significant difference between the means.
Recall,0.6112,0.4353,There is no significant difference between the means.
F1-score,0.61,0.4357,There is no significant difference between the means.


## Data with variable selection by correlation

### Ministry of Health

In [167]:
WPtabela_MS_corr = tabela("3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rl",
                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rf",
                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_svm",
                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_gboost")
WPtabela_MS_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,60.03% (41.18% - 75.0%),62.56% (49.73% - 75.0%),60.03% (41.18% - 75.0%),59.26% (37.28% - 74.98%)
RF,69.74% (54.41% - 82.35%),70.54% (54.51% - 82.35%),69.74% (54.41% - 82.35%),69.65% (54.16% - 82.35%)
SVM,78.57% (52.94% - 89.71%),83.75% (69.06% - 90.44%),78.57% (52.94% - 89.71%),77.71% (44.57% - 89.56%)
GBOOST,75.44% (60.29% - 88.24%),76.52% (60.17% - 88.24%),75.44% (60.29% - 88.24%),75.35% (60.11% - 88.24%)


In [168]:
# Saving the tables obtained
tabela_WPtabela_MS_corr = WPtabela_MS_corr.to_excel("tabela_metricas_WPtabela_MS_corr.xlsx")

In [169]:
WPhiperp_MS_corr = tabela_hiperparametros("3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_RL",
                                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_RF",
                                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_SVM",
                                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/5- Best model/melhor_modelo_GBoost")
WPhiperp_MS_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=50, subsample=0.8"


In [170]:
# Saving the tables obtained
tabela_WPhiperp_MS_corr = WPhiperp_MS_corr.to_excel("tabela_metricas_WPhiperp_MS_corr_alt.xlsx")

In [142]:
WPgrupo_MS_corr = ["3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rl",
                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rf",
                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_svm",
                         "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_gboost"]
WPresultado_MS_corr = comparacao_entre_modelos(WPgrupo_MS_corr)

# If you want to print the results
for key, df in WPresultado_MS_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rl x 3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rf:
          Estatística F Valor p  \
Accuracy        145.243     0.0   
Precision      105.4179     0.0   
Recall          145.243     0.0   
F1-score       147.8754     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rl x 3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_svm:
          Estatística F Valor p  \
Acc

### Variables with significance

In [171]:
WPtabela_Art_corr = tabela("3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rl",
                          "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rf",
                          "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_svm",
                          "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_gboost")
WPtabela_Art_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,89.1% (81.03% - 98.28%),91.12% (83.04% - 98.33%),89.1% (81.03% - 98.28%),88.96% (80.19% - 98.27%)
RF,87.28% (77.59% - 94.83%),89.09% (77.7% - 95.4%),87.28% (77.59% - 94.83%),87.13% (77.62% - 94.86%)
SVM,88.78% (82.76% - 98.28%),90.98% (86.96% - 98.32%),88.78% (82.76% - 98.28%),88.61% (82.0% - 98.27%)
GBOOST,87.59% (79.31% - 94.83%),89.44% (79.6% - 95.23%),87.59% (79.31% - 94.83%),87.5% (79.36% - 94.74%)


In [172]:
# Saving the tables obtained
tabela_WPtabela_Art_corr = WPtabela_Art_corr.to_excel("tabela_metricas_WPtabela_Art_corr.xlsx")

In [173]:
WPhiperp_Art_corr = tabela_hiperparametros("3- Worst prognosis/8- Analise_correlacao/3- Article/5- Best model/melhor_modelo_RL",
                                          "3- Worst prognosis/8- Analise_correlacao/3- Article/5- Best model/melhor_modelo_RF",
                                          "3- Worst prognosis/8- Analise_correlacao/3- Article/5- Best model/melhor_modelo_SVM",
                                          "3- Worst prognosis/8- Analise_correlacao/3- Article/5- Best model/melhor_modelo_GBoost")
WPhiperp_Art_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=100, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=5, n_estimators=100, subsample=0.8"


In [174]:
# Saving the tables obtained
tabela_WPhiperp_Art_corr = WPhiperp_Art_corr.to_excel("tabela_metricas_WPhiperp_Art_corr.xlsx")

In [143]:
WPgrupo_Art_corr = ["3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rl",
                          "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rf",
                          "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_svm",
                          "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_gboost"]
WPresultado_Art_corr = comparacao_entre_modelos(WPgrupo_Art_corr)

# If you want to print the results
for key, df in WPresultado_Art_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rl x 3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rf:
          Estatística F Valor p  \
Accuracy        13.5767  0.0003   
Precision       22.0769     0.0   
Recall          13.5767  0.0003   
F1-score        13.2901  0.0003   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rl x 3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_svm:
          Estatística F Valor p  \
Accuracy         0.4543  0.5011   
Precisio

### Ministry of Health + Variables with significance

In [175]:
WPtabela_MSArt_corr = tabela("3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rl",
                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rf",
                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_svm",
                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_gboost")
WPtabela_MSArt_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,92.52% (80.36% - 100.0%),93.1% (80.86% - 100.0%),92.52% (80.36% - 100.0%),92.51% (80.38% - 100.0%)
RF,94.5% (85.71% - 100.0%),94.78% (86.46% - 100.0%),94.5% (85.71% - 100.0%),94.5% (85.64% - 100.0%)
SVM,92.23% (80.36% - 98.21%),92.66% (80.88% - 98.28%),92.23% (80.36% - 98.21%),92.22% (79.93% - 98.21%)
GBOOST,94.16% (87.5% - 100.0%),94.45% (87.85% - 100.0%),94.16% (87.5% - 100.0%),94.16% (87.24% - 100.0%)


In [176]:
# Saving the tables obtained
tabela_WPtabela_MSArt_corr = WPtabela_MSArt_corr.to_excel("tabela_metricas_WPtabela_MSArt_corr.xlsx")

In [177]:
WPhiperp_MSArt_corr = tabela_hiperparametros("3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_RL",
                                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_RF",
                                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_SVM",
                                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSArt_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l2', solver='liblinear'"
RF,"criterion='entropy', n_estimators=100, max_depth=None, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8"


In [178]:
# Saving the tables obtained
tabela_WPhiperp_MSArt_corr = WPhiperp_MSArt_corr.to_excel("tabela_metricas_WPhiperp_MSArt_corr.xlsx")

In [144]:
WPgrupo_MSArt_corr = ["3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rl",
                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rf",
                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_svm",
                            "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_gboost"]
WPresultado_MSArt_corr = comparacao_entre_modelos(WPgrupo_MSArt_corr)

# If you want to print the results
for key, df in WPresultado_MSArt_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rl x 3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rf:
          Estatística F Valor p  \
Accuracy         15.008  0.0001   
Precision       13.0628  0.0004   
Recall           15.008  0.0001   
F1-score        14.9535  0.0001   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rl x 3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMS

### Ministry of Health with Oversampling

In [179]:
WPtabela_MS0_corr = tabela("3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rl",
                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rf",
                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_svm",
                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_gboost")
WPtabela_MS0_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,62.15% (54.55% - 67.61%),62.91% (56.25% - 67.85%),62.15% (54.55% - 67.61%),61.9% (52.16% - 67.65%)
RF,74.22% (67.61% - 81.82%),74.58% (67.76% - 82.1%),74.22% (67.61% - 81.82%),74.18% (67.58% - 81.79%)
SVM,65.12% (51.14% - 73.86%),70.32% (56.61% - 77.09%),65.12% (51.14% - 73.86%),63.07% (44.26% - 73.66%)
GBOOST,74.1% (64.77% - 80.68%),74.61% (65.64% - 81.58%),74.1% (64.77% - 80.68%),74.07% (64.66% - 80.44%)


In [180]:
# Saving the tables obtained
tabela_WPtabela_MS0_corr = WPtabela_MS0_corr.to_excel("tabela_metricas_WPtabela_MS0_corr.xlsx")

In [181]:
WPhiperp_MS0_corr = tabela_hiperparametros("3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RL",
                                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_RF",
                                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_SVM",
                                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/5- Best model/melhor_modelo_GBoost")
WPhiperp_MS0_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='entropy', n_estimators=200, max_depth=None, min_samples_split=15, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='rbf', C=10, gamma=1"
GBOOST,"max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0"


In [182]:
# Saving the tables obtained
tabela_WPhiperp_MS0_corr = WPhiperp_MS0_corr.to_excel("tabela_metricas_WPhiperp_MS0_corr.xlsx")

In [145]:
WPgrupo_MS0_corr = ["3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rl",
                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rf",
                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_svm",
                          "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_gboost"]
WPresultado_MS0_corr = comparacao_entre_modelos(WPgrupo_MS0_corr)

# If you want to print the results
for key, df in WPresultado_MS0_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rl x 3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rf:
          Estatística F Valor p  \
Accuracy       932.4412     0.0   
Precision      899.6112     0.0   
Recall         932.4412     0.0   
F1-score       908.1332     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rl x 3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metr

### Ministry of Health + Variables with significance with Oversampling

In [183]:
WPtabela_MSArt0_corr = tabela("3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rl",
                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rf",
                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_svm",
                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_gboost")
WPtabela_MSArt0_corr

Unnamed: 0,Accuracy,Precision,Recall,F1-score
LR,91.67% (85.47% - 95.73%),92.24% (86.49% - 96.04%),91.67% (85.47% - 95.73%),91.64% (85.4% - 95.73%)
RF,93.74% (87.18% - 97.44%),94.17% (87.22% - 97.58%),93.74% (87.18% - 97.44%),93.72% (87.19% - 97.44%)
SVM,90.68% (84.62% - 95.73%),91.7% (84.63% - 96.08%),90.68% (84.62% - 95.73%),90.64% (84.43% - 95.73%)
GBOOST,92.96% (87.18% - 97.44%),93.23% (87.19% - 97.45%),92.96% (87.18% - 97.44%),92.95% (87.15% - 97.43%)


In [184]:
# Saving the tables obtained
tabela_WPtabela_MSArt0_corr = WPtabela_MSArt0_corr.to_excel("tabela_metricas_WPtabela_MSArt0_corr.xlsx")

In [185]:
WPhiperp_MSArt0_corr = tabela_hiperparametros("3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_RL",
                                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_RF",
                                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_SVM",
                                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/5- Best model/melhor_modelo_GBoost")
WPhiperp_MSArt0_corr

Unnamed: 0,Hyperparameters
LR,"penalty='l1', solver='liblinear'"
RF,"criterion='gini', n_estimators=200, max_depth=10, min_samples_split=5, max_features='log2', class_weight='balanced_subsample'"
SVM,"kernel='linear', C=1, gamma=1"
GBOOST,"max_depth=4, min_samples_split=2, n_estimators=100, subsample=0.8"


In [187]:
# Saving the tables obtained
tabela_WPhiperp_MSArt0_corr = WPhiperp_MSArt0_corr.to_excel("tabela_metricas_WPhiperp_MSArt0_corr.xlsx")

In [146]:
WPgrupo_MSArt0_corr = ["3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rl",
                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rf",
                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_svm",
                             "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_gboost"]
WPresultado_MSArt0_corr = comparacao_entre_modelos(WPgrupo_MSArt0_corr)

# If you want to print the results
for key, df in WPresultado_MSArt0_corr.items():
    print(f"Results for {key}:")
    print(df, "\n")

Results for 3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rl x 3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rf:
          Estatística F Valor p  \
Accuracy        52.0051     0.0   
Precision       60.9257     0.0   
Recall          52.0051     0.0   
F1-score        51.9664     0.0   

                                                          Mensagem  
Accuracy   There is a significant difference between the averages.  
Precision  There is a significant difference between the averages.  
Recall     There is a significant difference between the averages.  
F1-score   There is a significant difference between the averages.   

Results for 3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rl x 3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+arti

### Are the models that use the Ministry of Health and Ministry of Health Oversampling variables the same?

In [147]:
WPpergunta1_rl_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rl",
                                "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rl")
WPpergunta1_rl_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2483.3918,0.0,There is a significant difference between the averages.
Precision,2523.6133,0.0,There is a significant difference between the averages.
Recall,2483.3918,0.0,There is a significant difference between the averages.
F1-score,2207.0034,0.0,There is a significant difference between the averages.


In [148]:
WPpergunta1_rf_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rf",
                                "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rf")
WPpergunta1_rf_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1757.0385,0.0,There is a significant difference between the averages.
Precision,1762.6833,0.0,There is a significant difference between the averages.
Recall,1757.0385,0.0,There is a significant difference between the averages.
F1-score,1720.0004,0.0,There is a significant difference between the averages.


In [149]:
WPpergunta1_svm_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_svm",
                                "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_svm")
WPpergunta1_svm_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,265.9525,0.0,There is a significant difference between the averages.
Precision,307.6744,0.0,There is a significant difference between the averages.
Recall,265.9525,0.0,There is a significant difference between the averages.
F1-score,248.6699,0.0,There is a significant difference between the averages.


In [150]:
WPpergunta1_gboost_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_gboost",
                                "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_gboost")
WPpergunta1_gboost_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,883.4957,0.0,There is a significant difference between the averages.
Precision,783.3462,0.0,There is a significant difference between the averages.
Recall,883.4957,0.0,There is a significant difference between the averages.
F1-score,886.4486,0.0,There is a significant difference between the averages.


### Are the models that use the Ministry of Health + Article and Ministry of Health + Article Oversampling variables the same?

In [151]:
WPpergunta2_rl_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rl",
                                "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rl")
WPpergunta2_rl_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,3951.2343,0.0,There is a significant difference between the averages.
Precision,4721.9061,0.0,There is a significant difference between the averages.
Recall,3951.2343,0.0,There is a significant difference between the averages.
F1-score,3839.1163,0.0,There is a significant difference between the averages.


In [152]:
WPpergunta2_rf_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rf",
                                "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rf")
WPpergunta2_rf_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2230.2072,0.0,There is a significant difference between the averages.
Precision,2340.3274,0.0,There is a significant difference between the averages.
Recall,2230.2072,0.0,There is a significant difference between the averages.
F1-score,2231.5976,0.0,There is a significant difference between the averages.


In [153]:
WPpergunta2_svm_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_svm",
                                "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_svm")
WPpergunta2_svm_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2240.9372,0.0,There is a significant difference between the averages.
Precision,2108.1656,0.0,There is a significant difference between the averages.
Recall,2240.9372,0.0,There is a significant difference between the averages.
F1-score,1964.2267,0.0,There is a significant difference between the averages.


In [154]:
WPpergunta2_gboost_corr = comparacao("3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_gboost",
                                "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_gboost")
WPpergunta2_gboost_corr

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2034.113,0.0,There is a significant difference between the averages.
Precision,2104.1609,0.0,There is a significant difference between the averages.
Recall,2034.113,0.0,There is a significant difference between the averages.
F1-score,2033.0348,0.0,There is a significant difference between the averages.


### Is there a difference between the models predicted with all variables and with the variables selected by correlation?

#### MS

In [155]:
WPpergunta3_corr_rlMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rl",
                              "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rl")
WPpergunta3_corr_rlMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,9.1749,0.0028,There is a significant difference between the averages.
Precision,4.3938,0.0373,There is a significant difference between the averages.
Recall,9.1749,0.0028,There is a significant difference between the averages.
F1-score,10.1651,0.0017,There is a significant difference between the averages.


In [156]:
WPpergunta3_corr_rfMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_rf",
                              "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_rf")
WPpergunta3_corr_rfMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.5428,0.4622,There is no significant difference between the means.
Precision,1.502,0.2218,There is no significant difference between the means.
Recall,0.5428,0.4622,There is no significant difference between the means.
F1-score,0.5308,0.4671,There is no significant difference between the means.


In [157]:
WPpergunta3_corr_svmMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_svm",
                              "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_svm")
WPpergunta3_corr_svmMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,128.6216,0.0,There is a significant difference between the averages.
Precision,52.6475,0.0,There is a significant difference between the averages.
Recall,128.6216,0.0,There is a significant difference between the averages.
F1-score,138.2934,0.0,There is a significant difference between the averages.


In [158]:
WPpergunta3_corr_gboostMS = comparacao("3- Worst prognosis/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_gboost",
                              "3- Worst prognosis/8- Analise_correlacao/1- Ministry_of_Health/4- Metrics/WPtabela_metricasMS_corr_gboost")
WPpergunta3_corr_gboostMS

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,44.3907,0.0,There is a significant difference between the averages.
Precision,45.8495,0.0,There is a significant difference between the averages.
Recall,44.3907,0.0,There is a significant difference between the averages.
F1-score,43.2523,0.0,There is a significant difference between the averages.


#### MS + ART

In [159]:
WPpergunta3_corr_rlMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rl",
                              "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rl")
WPpergunta3_corr_rlMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.8078,0.1803,There is no significant difference between the means.
Precision,1.329,0.2504,There is no significant difference between the means.
Recall,1.8078,0.1803,There is no significant difference between the means.
F1-score,1.7936,0.182,There is no significant difference between the means.


In [160]:
WPpergunta3_corr_rfMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_rf",
                              "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_rf")
WPpergunta3_corr_rfMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,50.8394,0.0,There is a significant difference between the averages.
Precision,50.5638,0.0,There is a significant difference between the averages.
Recall,50.8394,0.0,There is a significant difference between the averages.
F1-score,50.704,0.0,There is a significant difference between the averages.


In [161]:
WPpergunta3_corr_svmMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_svm",
                              "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_svm")
WPpergunta3_corr_svmMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,1.0464,0.3076,There is no significant difference between the means.
Precision,0.2582,0.612,There is no significant difference between the means.
Recall,1.0464,0.3076,There is no significant difference between the means.
F1-score,1.0555,0.3055,There is no significant difference between the means.


In [162]:
WPpergunta3_corr_gboostMSArt = comparacao("3- Worst prognosis/2.1- Ministry_of_Health + article - Quimio/4- Metrics/WPtabela_metricasMSart_gboost",
                              "3- Worst prognosis/8- Analise_correlacao/2- Ministry_of_Health + article/4- Metrics/WPtabela_metricasMSart_coor_gboost")
WPpergunta3_corr_gboostMSArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,47.9279,0.0,There is a significant difference between the averages.
Precision,48.7633,0.0,There is a significant difference between the averages.
Recall,47.9279,0.0,There is a significant difference between the averages.
F1-score,48.0556,0.0,There is a significant difference between the averages.


#### ART

In [163]:
WPpergunta3_corr_rlArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rl",
                              "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rl")
WPpergunta3_corr_rlArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,3.8704,0.0505,There is no significant difference between the means.
Precision,11.3684,0.0009,There is a significant difference between the averages.
Recall,3.8704,0.0505,There is no significant difference between the means.
F1-score,3.5263,0.0619,There is no significant difference between the means.


In [164]:
WPpergunta3_corr_rfArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_rf",
                              "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_rf")
WPpergunta3_corr_rfArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.9465,0.3318,There is no significant difference between the means.
Precision,5.1392,0.0245,There is a significant difference between the averages.
Recall,0.9465,0.3318,There is no significant difference between the means.
F1-score,0.7462,0.3887,There is no significant difference between the means.


In [165]:
WPpergunta3_corr_svmArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_svm",
                              "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_svm")
WPpergunta3_corr_svmArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,2.1698,0.1423,There is no significant difference between the means.
Precision,6.423,0.012,There is a significant difference between the averages.
Recall,2.1698,0.1423,There is no significant difference between the means.
F1-score,1.7556,0.1867,There is no significant difference between the means.


In [166]:
WPpergunta3_corr_gboostArt = comparacao("3- Worst prognosis/3.1- Article - Quimio/4- Metrics/WPtabela_metricasart_gboost",
                              "3- Worst prognosis/8- Analise_correlacao/3- Article/4- Metrics/WPtabela_metricasart_corr_gboost")
WPpergunta3_corr_gboostArt

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.0332,0.8555,There is no significant difference between the means.
Precision,6.0356,0.0149,There is a significant difference between the averages.
Recall,0.0332,0.8555,There is no significant difference between the means.
F1-score,0.1119,0.7383,There is no significant difference between the means.


#### MS Oversampling

In [167]:
WPpergunta3_corr_rlMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rl",
                              "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rl")
WPpergunta3_corr_rlMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,44.9423,0.0,There is a significant difference between the averages.
Precision,43.1565,0.0,There is a significant difference between the averages.
Recall,44.9423,0.0,There is a significant difference between the averages.
F1-score,45.2343,0.0,There is a significant difference between the averages.


In [168]:
WPpergunta3_corr_rfMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_rf",
                              "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_rf")
WPpergunta3_corr_rfMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,56.7745,0.0,There is a significant difference between the averages.
Precision,71.2999,0.0,There is a significant difference between the averages.
Recall,56.7745,0.0,There is a significant difference between the averages.
F1-score,53.7351,0.0,There is a significant difference between the averages.


In [169]:
WPpergunta3_corr_svmMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_svm",
                              "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_svm")
WPpergunta3_corr_svmMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.3265,0.5684,There is no significant difference between the means.
Precision,96.4042,0.0,There is a significant difference between the averages.
Recall,0.3265,0.5684,There is no significant difference between the means.
F1-score,5.5438,0.0195,There is a significant difference between the averages.


In [170]:
WPpergunta3_corr_gboostMS0 = comparacao("3- Worst prognosis/5- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_gboost",
                              "3- Worst prognosis/8- Analise_correlacao/4- Ministry_of_Health oversampling/4- Metrics/WPtabela_metricasMS0_corr_gboost")
WPpergunta3_corr_gboostMS0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,29.8154,0.0,There is a significant difference between the averages.
Precision,25.394,0.0,There is a significant difference between the averages.
Recall,29.8154,0.0,There is a significant difference between the averages.
F1-score,29.6249,0.0,There is a significant difference between the averages.


#### MS + ART Oversampling

In [173]:
WPpergunta3_corr_rlMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rl",
                              "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rl")
WPpergunta3_corr_rlMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,4.3525,0.0382,There is a significant difference between the averages.
Precision,2.4446,0.1195,There is no significant difference between the means.
Recall,4.3525,0.0382,There is a significant difference between the averages.
F1-score,4.5406,0.0343,There is a significant difference between the averages.


In [174]:
WPpergunta3_corr_rfMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_rf",
                              "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_rf")
WPpergunta3_corr_rfMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,0.6622,0.4168,There is no significant difference between the means.
Precision,0.1374,0.7113,There is no significant difference between the means.
Recall,0.6622,0.4168,There is no significant difference between the means.
F1-score,0.6871,0.4082,There is no significant difference between the means.


In [175]:
WPpergunta3_corr_svmMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm",
                              "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_svm")
WPpergunta3_corr_svmMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,22.9563,0.0,There is a significant difference between the averages.
Precision,20.3995,0.0,There is a significant difference between the averages.
Recall,22.9563,0.0,There is a significant difference between the averages.
F1-score,22.7731,0.0,There is a significant difference between the averages.


In [172]:
WPpergunta3_corr_gboostMSArt0 = comparacao("3- Worst prognosis/5.1- Ministry_of_Health+article oversampling - Quimio/4- Metrics/WPtabela_metricasMSart0_svm",
                              "3- Worst prognosis/8- Analise_correlacao/5- Ministry_of_Health+article oversampling/4- Metrics/WPtabela_metricasMSart0_corr_gboost")
WPpergunta3_corr_gboostMSArt0

Unnamed: 0,F-statistic,p-value,Interpretation
Accuracy,9.5312,0.0023,There is a significant difference between the averages.
Precision,2.2723,0.1333,There is no significant difference between the means.
Recall,9.5312,0.0023,There is a significant difference between the averages.
F1-score,9.8832,0.0019,There is a significant difference between the averages.
