In [1]:
from src.components.data.process_data import process_data
from src.components.data.read_dataset import read_dataset

In [2]:
from keras import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam


def create_model(input_shape, output_size):
    model = Sequential(
        [
            Input(shape=input_shape),
            Dense(64, activation="relu"),
            Dense(32, activation="relu"),
            Dense(output_size, activation="softmax"),
        ]
    )

    model.compile(
        optimizer=Adam(learning_rate=0.01),
        loss="categorical_crossentropy",  # Cambiar la función de pérdida según tu problema
        metrics=["accuracy"],
    )

    return model

2024-02-08 12:25:15.343414: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-08 12:25:15.538586: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-08 12:25:15.538675: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-08 12:25:15.561065: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-08 12:25:15.622981: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-08 12:25:15.624309: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [3]:
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

dataset = read_dataset("time_series_60min_singleindex.csv")
x_data, y_data = process_data(dataset)

# Normalizar los datos
x_data = MinMaxScaler().fit_transform(x_data)
y_temp = tf.one_hot(y_data, max(y_data) + 1).numpy()

# Crear el modelo
input_shape = x_data.shape[1:]
output_size = 4
model = create_model(input_shape, output_size)

2024-02-08 12:25:21.690347: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-08 12:25:21.690960: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [331]:
res = model.fit(
    x_data,
    y_temp,
    epochs=10,
    verbose=0,
)

In [336]:
loss = res.history["loss"]

In [None]:
result = model.evaluate(x_data, y_data)

In [4]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder


def label_to_one_hot(array, depth=4):
    return tf.one_hot(array, depth).numpy()


def one_hot_to_label(array):
    return np.argmax(array, axis=1)

In [5]:
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, zero_one_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


arch = [
    (  # Sequential model
        create_model,
        {
            "name": "Keras Sequential 1",
            "init_args": {"input_shape": input_shape, "output_size": output_size},
            "fit_args": {"epochs": 10, "verbose": 0},
            "predict_args": {"verbose": 0},
            "y_pre": label_to_one_hot,
            "y_post": one_hot_to_label,
        },
    ),
    (
        create_model,
        {
            "name": "Keras Sequential 2",
            "init_args": {"input_shape": input_shape, "output_size": output_size},
            "fit_args": {"epochs": 20, "verbose": 0},
            "predict_args": {"verbose": 0},
            "y_pre": label_to_one_hot,
            "y_post": one_hot_to_label,
        },
    ),
    # (
    #     XGBClassifier,
    #     {
    #         "init_args": {
    #             "n_estimators": 1000,
    #             # "early_stopping_rounds": 50,
    #             "learning_rate": 0.01,
    #             "colsample_bytree": 0.8,
    #             "max_depth": 7,
    #         },
    #     },
    # ),
    (LGBMClassifier, {"init_args": {"n_estimators": 1000, "verbose": -1}}),
    (KNeighborsClassifier, {"init_args": {"n_neighbors": 5, "weights": "uniform"}}),
    (RandomForestClassifier, {"init_args": {"max_depth": 4, "random_state": 0}}),
]

metrics = [
    # (roc_auc_score, {"name": "auc", "args": {"multi_class": "ovr"}}),
    (zero_one_loss, {"name": "loss"}),
    (accuracy_score, {"name": "acc"}),
    (f1_score, {"name": "f1", "args": {"average": "weighted"}}),
]

In [13]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm

from automl.automl import _calculate_metrics, _get_split_data


def auto_ml(x_data, y_data, arch, metrics, cv=5):
    kfs = StratifiedKFold(n_splits=cv - 1, shuffle=True, random_state=42)
    split_indexes = list(kfs.split(x_data, y_data))

    models = []
    models_results = []
    for model_init, args in arch:
        model_name = args.get("name", model_init.__name__)
        y_test_list = []
        y_predict_list = []
        print("Running model:", model_name)

        for x_train, x_test, y_train, y_test in tqdm(
            _get_split_data(x_data, y_data, split_indexes, args), total=cv - 1
        ):
            # Train the model
            model = model_init(**args.get("init_args", {}))
            model.fit(x_train, y_train, **args.get("fit_args", {}))

            # Make predictions
            predictions = model.predict(x_test, **args.get("predict_args", {}))
            y_predict_list.append(args.get("y_post", lambda y: y)(predictions))
            y_test_list.append(y_test)

        model_metrics = _calculate_metrics(
            np.concatenate(y_test_list),
            np.concatenate(y_predict_list),
            metrics,
        )
        model_info = {"model": model_name} | model_metrics
        print(model_info, "\n")

        # Add results to the list
        models.append(model)
        models_results.append(model_info)

    return pd.DataFrame(models_results)

In [14]:
auto_ml(x_data, y_data, arch, metrics).sort_values("acc", ascending=False)

Running model: Keras Sequential 1


100%|██████████| 4/4 [00:04<00:00,  1.16s/it]


{'model': 'Keras Sequential 1', 'loss': 0.5080405002977963, 'acc': 0.4919594997022037, 'f1': 0.512385644388086} 

Running model: Keras Sequential 2


100%|██████████| 4/4 [00:06<00:00,  1.51s/it]


{'model': 'Keras Sequential 2', 'loss': 0.490172721858249, 'acc': 0.509827278141751, 'f1': 0.5227301992828707} 

Running model: LGBMClassifier


100%|██████████| 4/4 [00:07<00:00,  1.86s/it]


{'model': 'LGBMClassifier', 'loss': 0.32757593805836804, 'acc': 0.672424061941632, 'f1': 0.6729635178447443} 

Running model: KNeighborsClassifier


100%|██████████| 4/4 [00:00<00:00, 296.78it/s]


{'model': 'KNeighborsClassifier', 'loss': 0.3692674210839786, 'acc': 0.6307325789160214, 'f1': 0.6333632281559901} 

Running model: RandomForestClassifier


100%|██████████| 4/4 [00:01<00:00,  3.41it/s]

{'model': 'RandomForestClassifier', 'loss': 0.4782608695652174, 'acc': 0.5217391304347826, 'f1': 0.5237229781255656} 






Unnamed: 0,model,loss,acc,f1
2,LGBMClassifier,0.327576,0.672424,0.672964
3,KNeighborsClassifier,0.369267,0.630733,0.633363
4,RandomForestClassifier,0.478261,0.521739,0.523723
1,Keras Sequential 2,0.490173,0.509827,0.52273
0,Keras Sequential 1,0.508041,0.491959,0.512386
