In [1]:
import pandas as pd
import numpy as np
import torch, time
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn import metrics
import os

# Modelo DNN

In [2]:
# Transformaciones (normalización simple a [0,1])
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # asegura canal único
    transforms.ToTensor()
])

In [None]:
# Carga dataset desde la carpeta raíz generada por TINTOlib
ruta_carpeta_actual = os.getcwd()
ruta_carpeta_raiz = os.path.dirname(ruta_carpeta_actual)
data_dir  = os.path.join(ruta_carpeta_raiz, "dataset", "PuntosMuestra_CR_tinto_synthe")

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # si vas a ViT
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

dataset = datasets.ImageFolder(root=data_dir, transform=transform)

In [4]:
# 1) quitar clase "02"
desc_cat = "02"
if desc_cat in dataset.class_to_idx:
    idx_c2 = dataset.class_to_idx[desc_cat]
    dataset.samples = [s for s in dataset.samples if s[1] != idx_c2]
    dataset.targets = [t for t in dataset.targets if t != idx_c2]
    # eliminamos la clase del listado (actualizaremos el mapping abajo)
    print(f"Clase {desc_cat} eliminada (índice {idx_c2}).")
else:
    print(f"ℹ No existe carpeta {desc_cat} en dataset.class_to_idx !")

# 2) RE-MAPEAR etiquetas a 0..K-1
unique_old = sorted(set(t for _, t in dataset.samples))
old2new = {old:i for i, old in enumerate(unique_old)}

dataset.samples = [(p, old2new[t]) for (p, t) in dataset.samples]
dataset.targets = [old2new[t] for t in dataset.targets]

# reconstruir class_to_idx y classes coherentes
# invertimos el mapping original para recuperar nombres de carpeta por índice antiguo
idx2class = {v:k for k,v in dataset.class_to_idx.items()}
dataset.class_to_idx = {idx2class[old]: new for old, new in old2new.items()}
# ordenar por índice nuevo para que 'classes' quede alineado a 0..K-1
dataset.classes = [c for c,_ in sorted(dataset.class_to_idx.items(), key=lambda kv: kv[1])]

print("Clases activas:", dataset.classes)
print("Mapping nuevo:", dataset.class_to_idx)
num_classes = len(dataset.classes)


Clase 02 eliminada (índice 1).
Clases activas: ['01', '03', '04', '06', '07', '10']
Mapping nuevo: {'01': 0, '03': 1, '04': 2, '06': 3, '07': 4, '10': 5}


In [5]:
# Split train/test
n = len(dataset)
n_train = int(0.7*n)
train_ds, test_ds = torch.utils.data.random_split(dataset, [n_train, n-n_train])
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
test_dl  = DataLoader(test_ds, batch_size=64)

## Modelo entrenamiento y tiempo

In [6]:
class SmallCNN(nn.Module):
    def __init__(self, num_classes, in_ch=1):
        super().__init__()
        self.feats = nn.Sequential(
            nn.Conv2d(in_ch,16,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16,32,3,padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)            # independiente del tamaño HxW
        )
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32,128), nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        return self.head(self.feats(x))

num_classes = len(set(lbl for _, lbl in dataset.samples))
model = SmallCNN(num_classes=num_classes, in_ch=3)    # in_ch=1 por Grayscale(1)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
crit = nn.CrossEntropyLoss()
opt  = torch.optim.Adam(model.parameters(), lr=1e-4)

# --- tiempo de entrenamiento (1-3 épocas para prueba) ---
epochs = 3
t0 = time.perf_counter()
for _ in range(epochs):
    model.train()
    for x,y in train_dl:
        x,y = x.to(device), y.to(device)
        opt.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        opt.step()
t1 = time.perf_counter()
fit_s = t1 - t0
print(f"⏱ CNN/ViT – entrenamiento ({epochs} ep): {fit_s:.2f}s")

# --- tiempo de predicción ---
t0 = time.perf_counter()
y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for x,y in test_dl:
        x = x.to(device)
        logits = model(x)
        preds = logits.argmax(1).cpu().tolist()
        y_pred += preds
        y_true += y.tolist()
t1 = time.perf_counter()
pred_s = t1 - t0
print(f"⏱ CNN/ViT – predicción: {pred_s:.3f}s  ({pred_s/len(y_true)*1000:.2f} ms/muestra)")

⏱ CNN/ViT – entrenamiento (3 ep): 287.68s
⏱ CNN/ViT – predicción: 30.064s  (3.43 ms/muestra)


## Resultados

In [8]:
oa   = accuracy_score(y_true, y_pred)
f1m  = f1_score(y_true, y_pred, average='macro')
cm   = confusion_matrix(y_true, y_pred)

print("OA:", oa)
print("F1 macro:", f1m)
print("Matriz de confusión:\n", cm)


OA: 0.5234018264840182
F1 macro: 0.11452479080804295
Matriz de confusión:
 [[   0 2438    0    0    0    0]
 [   0 4585    0    0    0    0]
 [   0  329    0    0    0    0]
 [   0  357    0    0    0    0]
 [   0  415    0    0    0    0]
 [   0  636    0    0    0    0]]


# Registrar informacion

In [None]:
import sys
sys.path.append("..")
import importlib, utils_log
importlib.reload(utils_log)
from utils_log import log_row

carpeta_actual = ruta_carpeta_actual.split("\\")[-1]
dataset_utilizado = data_dir.split("\\")[-1]

log_row(
  script="20250901_PruebasEntrenamientoDNN.ipynb",
  algoritmo="DNN",
  dataset=dataset_utilizado,
  clases_removidas=[2],
  seed=42,
  n_train=len(y_true), n_test=len(y_pred),
  n_features=None, num_classes=len(num_classes),
  fit_seconds=fit_s, pred_seconds=pred_s,
  ms_per_sample=(pred_s/len(y_pred))*1000,
  OA=oa, F1_macro=f1m,
  carpeta=carpeta_actual
)

  from .autonotebook import tqdm as notebook_tqdm
