In [None]:
!pip install -q torch torchvision mediapipe scikit-learn opencv-python

In [None]:
# CNN com Suavização Temporal e Exportação JIT — Versão Otimizada
# Este notebook ajustado visa reduzir gargalos de I/O e CPU/GPU, implementar Inferência com Suavização Temporal e acelerar a exportação JIT.
    
# 1. Importações e Configurações
import os, random, string
from collections import deque
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision
from torchvision import transforms, datasets
from torchvision.models import mobilenet_v3_small
from sklearn.metrics import precision_score, recall_score, f1_score

# Acelera convoluções em formato variável
torch.backends.cudnn.benchmark = True

class CFG:
    TRAIN_PATH   = "/kaggle/input/aslamerican-sign-language-aplhabet-dataset/ASL_Alphabet_Dataset/asl_alphabet_train"
    IMG_SIZE     = 224
    BATCH_SIZE   = 64
    EPOCHS       = 20
    LR           = 3e-4
    WEIGHT_DECAY = 1e-4
    VAL_RATIO    = 0.2
    SEQ_LEN      = 5
    DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. Transforms
train_transform = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(CFG.IMG_SIZE, scale=(0.8,1.0)),
    transforms.ColorJitter(0.1,0.1,0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((CFG.IMG_SIZE,CFG.IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# 3. Dataset + Split
full_ds = datasets.ImageFolder(CFG.TRAIN_PATH, transform=train_transform)
val_size = int(len(full_ds) * CFG.VAL_RATIO)
train_size = len(full_ds) - val_size
train_ds, val_ds = random_split(full_ds, [train_size, val_size])
# Ajusta transform do val_ds
val_ds.dataset.transform = val_transform

# 4. DataLoaders com múltiplos workers e pin_memory
loader_kwargs = dict(batch_size=CFG.BATCH_SIZE,
                     num_workers=os.cpu_count(),
                     pin_memory=True)
train_loader = DataLoader(train_ds, shuffle=True, **loader_kwargs)
val_loader   = DataLoader(val_ds, shuffle=False, **loader_kwargs)

# 5. CNN com MobileNetV3
class ASLNet(nn.Module):
    def __init__(self, num_classes=len(full_ds.classes)):
        super().__init__()
        self.backbone = mobilenet_v3_small(pretrained=True)
        self.backbone.classifier = nn.Identity()
        self.fc = nn.Linear(576, num_classes)
    def forward(self, x):
        feat = self.backbone(x)
        return self.fc(feat)

model = ASLNet().to(CFG.DEVICE)

# 6. Mixed Precision & Otimização de Treino
optimizer = optim.Adam(model.parameters(), lr=CFG.LR, weight_decay=CFG.WEIGHT_DECAY)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler()

# 7. Suavização Temporal para Inferência
class TemporalSmoother:
    def __init__(self, seq_len):
        self.queue = deque(maxlen=seq_len)
    def smooth(self, preds):
        self.queue.append(preds.detach().cpu().float())
        stacked = torch.stack(list(self.queue), dim=0)
        return stacked.mean(dim=0)

smoother = TemporalSmoother(CFG.SEQ_LEN)

# 8. Funções de Treino, Avaliação e Loop

def train_epoch(loader):
    model.train()
    total_loss, correct = 0, 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(CFG.DEVICE), labels.to(CFG.DEVICE)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(imgs)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
        correct    += (outputs.argmax(1)==labels).sum().item()
    return total_loss/len(loader), correct/len(loader.dataset)

@torch.no_grad()
def eval_epoch(loader):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    for imgs, labels in loader:
        imgs, labels = imgs.to(CFG.DEVICE), labels.to(CFG.DEVICE)
        with torch.cuda.amp.autocast():
            outputs = model(imgs)
            loss = criterion(outputs, labels)
        total_loss += loss.item()
        all_preds.extend(outputs.argmax(1).cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    acc  = np.mean(np.array(all_preds)==np.array(all_labels))
    prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    rec  = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1   = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    return {'loss': total_loss/len(loader), 'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1}

best_val_acc = 0
for epoch in range(1, CFG.EPOCHS+1):
    tr_loss, tr_acc = train_epoch(train_loader)
    metrics = eval_epoch(val_loader)
    print(f"Epoch {epoch}/{CFG.EPOCHS} — Train loss {tr_loss:.4f}, acc {tr_acc:.4f} | Val loss {metrics['loss']:.4f}, acc {metrics['accuracy']:.4f}")
    if metrics['accuracy'] > best_val_acc:
        best_val_acc = metrics['accuracy']
        torch.save(model.state_dict(), 'best_model.pth')

# 9. Inferência com Suavização (exemplo de uso)
# supondo 'frames' seja iterable de batches de frames do vídeo
# smoother = TemporalSmoother(CFG.SEQ_LEN)
# for imgs in frames:
#     imgs = imgs.to(CFG.DEVICE)
#     with torch.no_grad():
#         preds = torch.softmax(model(imgs), dim=1)
#     smooth = smoother.smooth(preds)
#     decisão = smooth.argmax(dim=1)

# 10. Exportação JIT para C++
scripted = torch.jit.script(model)
scripted.save('asl_model_jit.pt')
print('Modelo JIT salvo em asl_model_jit.pt')
