## 1. Importacion De Librerias

In [14]:
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

import os

import joblib

## 2. Preparacion De Datos, Directorios y Modulos

In [15]:
DATASETS_DIRS = [
    "estandarizado",
    "estandarizado_PCA80",
    "estandarizado_PCA95",
    "normalizado",
    "normalizado_PCA80",
    "normalizado_PCA95",
    "original",
    "original_PCA80",
    "original_PCA95",
]

K_FOLDS = 5

DATA_DIR = os.path.join("..", "data")

if not os.path.exists(DATA_DIR):
    raise FileNotFoundError(f"Data directory '{DATA_DIR}' does not exist. Please ensure the data is available.")

MODELS_DIR = os.path.join("..", "models")

if not os.path.exists(MODELS_DIR):
    os.makedirs(MODELS_DIR)

def get_model(model_name):
    """Create a fresh instance of the specified model"""
    models = {
        "KNN": KNeighborsClassifier(),
        "SVM": SVC(probability=True),
        "NaiveBayes": GaussianNB(),
        "RandomForest": RandomForestClassifier(),
    }
    return models[model_name]

MODEL_NAMES = ["KNN", "SVM", "NaiveBayes", "RandomForest"]

## Entrenamiento y Almacenamiento de Modelos

In [None]:
def train_and_save_model(X_train, y_train, model, data_type, model_name, model_iteration, model_dir):
    
    model.fit(X_train, y_train)
    
    model_path = os.path.join(model_dir, f"{data_type}", f"{model_name}", f"model_fold_{model_iteration}.pkl")
    
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    
    joblib.dump(model, model_path)
    

for data_type in DATASETS_DIRS:
    
    data_path = os.path.join(DATA_DIR, data_type)
    
    if not os.path.exists(data_path):
        print(f"Data path '{data_path}' does not exist. Skipping...")
        continue
    
    for fold in range(K_FOLDS):
        
        fold_path = os.path.join(data_path, f"train_{fold + 1}_{data_type}.csv")
        
        # Fix: Load data only once
        train_data = pd.read_csv(fold_path)
        X_train = train_data.values[:, :-1]
        y_train = train_data.values[:, -1]
        
        for model_name in MODEL_NAMES:
            
            # Fix: Create fresh model instance for each fold
            model = get_model(model_name)
            
            train_and_save_model(X_train, y_train, model, data_type, model_name, fold + 1, MODELS_DIR)