In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.model_selection import KFold


num_folds = 5

# cria um dicionario com a chave sendo cada k_fold e os values são outro dicionario com cada key sendo o tipo de objeto (treinamento ou validacao) e os valores são um dataframe
k_folds = { fold : {"x_train": pd.read_csv(f'./K Folds Cross Validation/{fold}/x_train_{fold}').drop("Unnamed: 0", axis = 1),
                    "x_val": pd.read_csv(f'./K Folds Cross Validation/{fold}/x_val_{fold}').drop("Unnamed: 0", axis = 1),
                    "y_train": pd.read_csv(f'./K Folds Cross Validation/{fold}/y_train_{fold}').drop("Unnamed: 0", axis = 1)['weather'],  #abre o dataframe e ja faz o mapeamento
                    "y_val": pd.read_csv(f'./K Folds Cross Validation/{fold}/y_val_{fold}').drop("Unnamed: 0", axis = 1)['weather'],}     #abre o dataframe e ja faz o mapeamento
                     
                     for fold in os.listdir("./K Folds Cross Validation/")}
    
k_folds

# Listas para armazenar os resultados de cada fold
accuracy_scores = []
recall_scores = []
f1_scores = []

# Loop através dos folds
for fold in range(num_folds):
    # Carrega os datasets do fold atual
    x_train = k_folds.get(fold).get(f'x_train')
    y_train = k_folds.get(fold).get(f'y_train')

    x_val = k_folds.get(fold).get(f'x_val')
    y_val = k_folds.get(fold).get(f'y_val')

    y_train = y_train['weather']
    y_val = y_val['weather']

    # Inicializa o Random Forest
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

    # Treina o modelo
    rf_model.fit(x_train, y_train)

    # Faz previsões no conjunto de validação
    y_pred = rf_model.predict(x_val)

    # Avalie as métricas e armazene nas listas
    accuracy = accuracy_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred, average='weighted')  # Use 'weighted' para problemas multiclasse
    f1 = f1_score(y_val, y_pred, average='weighted')

    accuracy_scores.append(accuracy)
    recall_scores.append(recall)
    f1_scores.append(f1)

    print(f"Fold {fold + 1} - Precisão: {accuracy}, Recall: {recall}, F1: {f1}")

# Calcula a média e desvio padrão das métricas dos folds
mean_accuracy = np.mean(accuracy_scores)
mean_recall = np.mean(recall_scores)
mean_f1 = np.mean(f1_scores)

std_accuracy = np.std(accuracy_scores)
std_recall = np.std(recall_scores)
std_f1 = np.std(f1_scores)

print(f"\nMédia - Precisão: {mean_accuracy}, Recall: {mean_recall}, F1: {mean_f1}")
print(f"Desvio Padrão - Precisão: {std_accuracy}, Recall: {std_recall}, F1: { std_f1}")


Fold 1 - Precisão: 0.02, Recall: 0.02, F1: 0.02
Fold 2 - Precisão: 0.12, Recall: 0.12, F1: 0.12
Fold 3 - Precisão: 0.11, Recall: 0.11, F1: 0.12
Fold 4 - Precisão: 0.07, Recall: 0.07, F1: 0.07
Fold 5 - Precisão: 0.10, Recall: 0.10, F1: 0.08

Média - Precisão: 0.09, Recall: 0.09, F1: 0.08
Desvio Padrão - Precisão: 0.04, Recall: 0.04, F1: 0.04
