# Modelo DNN

In [None]:
import pandas as pd
import numpy as np
import torch, time
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn import metrics
import os

In [None]:
# Carga dataset desde la carpeta raíz generada por TINTOlib
ruta_carpeta_actual = os.getcwd()
ruta_carpeta_raiz = os.path.dirname(ruta_carpeta_actual)
csv_path  = os.path.join(ruta_carpeta_raiz, "dataset", "PuntosMuestra_CR_2023.csv")
df = pd.read_csv(csv_path)

## Limpieza

In [None]:
# Quitar valores de la CATEGORIA = 2
df_filtrado = df[df["CATEGORIA"] != 2].copy()

y = df_filtrado["CATEGORIA"]
X = df_filtrado.drop(columns=["CATEGORIA", "lon", "lat", "year"])

## Preprocesamiento

In [None]:
# Estandarizar características (mejora el entrenamiento de redes)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Data de entrenamiento y prueba

In [None]:
# --------------------------
# 2. Split train/test
# --------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

# Convertir a tensores
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.long)
X_test_t  = torch.tensor(X_test, dtype=torch.float32)
y_test_t  = torch.tensor(y_test.values, dtype=torch.long)

train_ds = TensorDataset(X_train_t, y_train_t)
test_ds  = TensorDataset(X_test_t, y_test_t)

train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl  = DataLoader(test_ds, batch_size=128)

Clase 02 eliminada (índice 1).
Clases activas: ['01', '03', '04', '06', '07', '10']
Mapping nuevo: {'01': 0, '03': 1, '04': 2, '06': 3, '07': 4, '10': 5}


## Definir modelo entrenamiento

In [None]:
# --------------------------
# 3. Definir modelo MLP
# --------------------------
input_dim = X_train.shape[1]
num_classes = len(y.unique())

class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes)
        )
    def forward(self, x):
        return self.net(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP(input_dim, num_classes).to(device)

crit = nn.CrossEntropyLoss()
opt  = torch.optim.Adam(model.parameters(), lr=1e-3)

## Entrenamiento y testeo

In [None]:
# --------------------------
# 4. Entrenamiento
# --------------------------
epochs = 20
t0 = time.perf_counter()
for ep in range(epochs):
    model.train()
    for xb, yb in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        loss = crit(model(xb), yb)
        loss.backward()
        opt.step()
fit_s = time.perf_counter() - t0
print(f"⏱ Entrenamiento ({epochs} ep): {fit_s:.2f}s")

# --------------------------
# 5. Test
# --------------------------
t0 = time.perf_counter()
model.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for xb, yb in test_dl:
        xb = xb.to(device)
        preds = model(xb).argmax(1).cpu().tolist()
        y_pred += preds
        y_true += yb.tolist()
pred_s = time.perf_counter() - t0
ms_per_sample = (pred_s/len(y_true))*1000
print(f"⏱ Test: {pred_s:.3f}s  ({ms_per_sample:.2f} ms/muestra)")

⏱ CNN/ViT – entrenamiento (3 ep): 287.68s
⏱ CNN/ViT – predicción: 30.064s  (3.43 ms/muestra)


## Resultados

In [None]:
oa  = accuracy_score(y_true, y_pred)
f1m = f1_score(y_true, y_pred, average="macro")
cm  = confusion_matrix(y_true, y_pred)

print(f"OA={oa:.4f} | F1-macro={f1m:.4f}")
print("Matriz de confusión:\n", cm)

OA: 0.5234018264840182
F1 macro: 0.11452479080804295
Matriz de confusión:
 [[   0 2438    0    0    0    0]
 [   0 4585    0    0    0    0]
 [   0  329    0    0    0    0]
 [   0  357    0    0    0    0]
 [   0  415    0    0    0    0]
 [   0  636    0    0    0    0]]


# Registrar informacion

In [None]:
import sys
sys.path.append("..")
import importlib, utils_log
importlib.reload(utils_log)
from utils_log import log_row

carpeta_actual = ruta_carpeta_actual.split("\\")[-1]
dataset_utilizado = csv_path.split("\\")[-1]

log_row(
  script="20250901_PruebasEntrenamientoDNN.ipynb",
  algoritmo="DNN_MLP",
  dataset=dataset_utilizado,
  clases_removidas=[2],
  seed=42,
  n_train=len(y_true), n_test=len(y_pred),
  n_features=None, num_classes=len(num_classes),
  fit_seconds=fit_s, pred_seconds=pred_s,
  ms_per_sample=(pred_s/len(y_pred))*1000,
  OA=oa, F1_macro=f1m,
  carpeta=carpeta_actual
)

  from .autonotebook import tqdm as notebook_tqdm
