In [2]:
import os
import torch
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import timm
from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
import kornia.augmentation as K
from tqdm import tqdm
from sklearn.metrics import classification_report

In [4]:
# 1. Configuración
cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. Dataset solo ToTensor en CPU
data_dir = r'/home/pibezx/Downloads/archive/fruits-360_100x100/fruits-360'
to_tensor = transforms.ToTensor()
print("📁 Cargando datasets...")
print(data_dir)

📁 Cargando datasets...
/home/pibezx/Downloads/archive/fruits-360_100x100/fruits-360


In [6]:
train_ds = datasets.ImageFolder(os.path.join(data_dir, 'Training'),transform=to_tensor)
val_ds   = datasets.ImageFolder(os.path.join(data_dir, 'Test'),transform=to_tensor)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,num_workers=1, pin_memory=True,persistent_workers=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False,num_workers=1, pin_memory=True, persistent_workers=True)

In [7]:
kornia_val = torch.nn.Sequential(
    K.Resize(size=(224, 224))
).to(device)

In [8]:
kornia_aug = torch.nn.Sequential(
    K.RandomHorizontalFlip(p=0.5),
    K.RandomRotation(degrees=15.0),
    K.Resize(size=(224, 224))
).to(device)

# Media y desviación de ImageNet en GPU
mean = torch.tensor([0.485, 0.456, 0.406], device=device).view(1,3,1,1)
std  = torch.tensor([0.229, 0.224, 0.225], device=device).view(1,3,1,1)

In [9]:
model = timm.create_model('vit_small_patch16_224', pretrained=True,
                          num_classes=len(train_ds.classes))
model.to(device)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False

In [10]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.1)
criterion = torch.nn.CrossEntropyLoss()
scaler    = GradScaler()


  scaler    = GradScaler()


In [None]:
epochs = 20
for epoch in range(1, epochs+1):
    # --- Entrenamiento ---
    model.train()
    running_loss = running_correct = total = 0
    pbar = tqdm(total=len(train_loader.dataset), unit='img',
                desc=f'Epoch {epoch}/{epochs}')
    
    for imgs, labels in train_loader:
        imgs   = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        
        # Aplicar augmentaciones en GPU
        imgs = kornia_aug(imgs)
        # Normalizar en GPU
        imgs = (imgs - mean) / std
        
        optimizer.zero_grad()
        with autocast():
            outputs = model(imgs)
            loss    = criterion(outputs, labels)
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()
        
        # Métricas de batch
        preds = outputs.argmax(dim=1)
        b     = imgs.size(0)
        running_correct += (preds == labels).sum().item()
        running_loss   += loss.item() * b
        total          += b
        
        pbar.update(b)
        pbar.set_postfix({
            'loss': f'{running_loss/total:.4f}',
            'acc':  f'{running_correct/total:.4f}'
        })
    pbar.close()

  with autocast():
Epoch 1/10: 100%|██████████| 105221/105221 [03:53<00:00, 450.42img/s, loss=0.2134, acc=0.9494]
Epoch 2/10: 100%|██████████| 105221/105221 [03:52<00:00, 452.19img/s, loss=0.0385, acc=0.9902]
Epoch 3/10: 100%|██████████| 105221/105221 [03:53<00:00, 451.11img/s, loss=0.0328, acc=0.9915]
Epoch 4/10: 100%|██████████| 105221/105221 [03:56<00:00, 445.51img/s, loss=0.0241, acc=0.9940]
Epoch 5/10:  36%|███▋      | 38144/105221 [01:27<02:37, 425.91img/s, loss=0.0228, acc=0.9934]

In [None]:
model.eval()
all_preds, all_labels = [], []

torch.cuda.empty_cache()

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs   = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
            
        imgs = kornia_val(imgs)
        imgs = (imgs - mean) / std
            
        out = model(imgs)
        all_preds.extend(out.argmax(dim=1).cpu().tolist())
        all_labels.extend(labels.cpu().tolist())
    

In [None]:
report = classification_report(
        all_labels, all_preds,
        target_names=train_ds.classes, digits=4, output_dict=True
    )
print(f"\n📊 Validación Epoch {epoch}")
for cls in train_ds.classes:
    r = report[cls]
    print(f"  {cls:15s} P:{r['precision']:.3f}  R:{r['recall']:.3f}  F1:{r['f1-score']:.3f}")
print(f"  Macro avg   P:{report['macro avg']['precision']:.3f}  "
          f"R:{report['macro avg']['recall']:.3f}  "
          f"F1:{report['macro avg']['f1-score']:.3f}\n")