## Imports e installs

In [2]:
#%pip install torch
%pip install InquirerPy
#%pip install kagglehub
#%pip install pandas
#%pip install torchvision
#%pip install ipywidgets
#%pip install scikit-learn seaborn
#%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Collecting InquirerPy
  Downloading InquirerPy-0.3.4-py3-none-any.whl.metadata (8.1 kB)
Collecting pfzy<0.4.0,>=0.3.1 (from InquirerPy)
  Downloading pfzy-0.3.4-py3-none-any.whl.metadata (4.9 kB)
Downloading InquirerPy-0.3.4-py3-none-any.whl (67 kB)
Downloading pfzy-0.3.4-py3-none-any.whl (8.5 kB)
Installing collected packages: pfzy, InquirerPy
Successfully installed InquirerPy-0.3.4 pfzy-0.3.4
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import re
import cv2
import time
import torch
import logging
import kagglehub
import numpy as np
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import matplotlib as ptl
import tqdm as notebook_tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
from torchvision import transforms
from torchvision.io import read_image
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18, ResNet18_Weights
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence
from torchvision.models import mobilenet_v2, MobileNet_V2_Weights

## Baixando Base de dados

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("davimedio01/v-librasil")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/davimedio01/v-librasil?dataset_version_number=1...


100%|██████████| 10.1G/10.1G [02:17<00:00, 78.8MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/davimedio01/v-librasil/versions/1


## Variáveis Globais


In [3]:
videos_path = "../../videos/"
csv_path = "../../videos/annotations.csv"
csv_path_features = "../../features/annotations.csv"
videos_path_teste = "../../features_teste/"
csv_path_teste = "../../features_teste/annotations.csv"
videos_path_val = "../../features_val/"
csv_path_val = "../../features_val/annotations.csv"
features_path = "../../features/"   

## Logger

In [4]:
logger = logging.getLogger("FeatureExtraction")
logger.setLevel(logging.DEBUG)

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
fh = logging.FileHandler("feature_extraction.log")
fh.setLevel(logging.DEBUG)

fmt = logging.Formatter(
    "%(asctime)s — %(name)s — %(levelname)s — %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)
ch.setFormatter(fmt)
fh.setFormatter(fmt)
logger.addHandler(ch)
logger.addHandler(fh)

# Treinamento CNN e RNN

## Dataloaders para CNN e RNN

In [None]:
class CNNDataset(Dataset):
  def __init__(self, annotations_file, videosDir, transform=None, target_transform=None):
    self.annotations_file = annotations_file
    self.labels = self.getLabels()
    classes_em_ordem = list(dict.fromkeys(self.labels["class"]))
    self.label2idx = {classe: i for i, classe in enumerate(classes_em_ordem)}
    self.idx2label = {i: classe for classe, i in self.label2idx.items()}
    self.videos_name = self.getVideosName()
    self.videosDir = videosDir
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, idx):
    video = self.extractFrames(self.videosDir + self.videos_name.iloc[idx,0])
    label = self.label2idx[self.labels.iloc[idx,0]]
    video_name = self.videos_name.iloc[idx,0]
    frames_t = [self.transform(frame) for frame in video]  # cada frame→Tensor[C,H,W]
    videoT = torch.stack(frames_t, dim=0)
    if self.target_transform:
      label = self.target_transform(label)
    return videoT, label, video_name

  def getLabels(self):
    labels = pd.read_csv(self.annotations_file)
    return labels[["class"]]

  def getVideosName(self):
    videosName = pd.read_csv(self.annotations_file)
    return videosName[["video_name"]]

  def idxToLabel(self,idx):
    return self.idx2label[idx]

  def extractFrames(self,filepath):
    video = cv2.VideoCapture(filepath)
    frames = []
    while video.isOpened():
      sucess, frame = video.read()
      if not sucess:
        break
      frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      frames.append(frame)
    video.release()
    return frames

def collate_fn(batch):
  sequences,labels, video_name = zip(*batch)
  lengths = [seq.shape[0] for seq in sequences]
  padded_sequences = pad_sequence(sequences=sequences,batch_first=True, padding_value=0)
  labels = torch.tensor(labels, dtype=torch.long)
  return padded_sequences,labels,video_name,lengths

In [6]:
class RNNDataset(Dataset):
  def __init__(self, annotations_file,featuresDir, transform=None, target_transform=None):
    self.annotations_file = annotations_file
    self.labels = self.getLabels()
    classes_em_ordem = list(dict.fromkeys(self.labels["class"]))
    self.label2idx = {classe: i for i, classe in enumerate(classes_em_ordem)}
    self.idx2label = {i: classe for classe, i in self.label2idx.items()}
    self.features = self.getFeaturesNames()
    self.featuresDir = featuresDir
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, idx):
    feature_data = self.extractFeature(self.featuresDir + self.features.iloc[idx, 0])
    features = feature_data['features']
    length = feature_data['length']  # comprimento real (sem padding)
    features = features[:length]  # remove o padding
    label = self.label2idx[self.labels.iloc[idx,0]]
    if self.transform:
      features = self.transform(features)
    if self.target_transform:
      label = self.target_transform(label)
    return features, label

  def getFeaturesNames(self):
    df = pd.read_csv(self.annotations_file)
    df["video_name"] = df["video_name"].str.replace("mp4", "pt", regex=False)
    return df[["video_name"]]

  def getLabels(self):
    df = pd.read_csv(self.annotations_file)
    return df[["class"]]
  
  
  def extractFeature(self, path):
    return torch.load(path)

def rnn_collate_fn(batch):
    sequences, labels = zip(*batch)
    lengths = [seq.shape[0] for seq in sequences]
    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0)
    labels = torch.tensor(labels, dtype=torch.long)
    return padded_sequences, labels, lengths





## Criação de modelos

### Modelo CNN

In [7]:


class CNNMobileNetV2(nn.Module):
    def __init__(self):
        super().__init__()
        # Carrega MobileNetV2 pré‑treinado
        backbone = mobilenet_v2(weights=MobileNet_V2_Weights.IMAGENET1K_V1)
        # Congela todos os parâmetros
        for p in backbone.parameters():
            p.requires_grad = False
          # 3) Extrai apenas as camadas convolucionais
        #    backbone.features é um nn.Sequential que vai até antes do classifier
        self.features = backbone.features

        # 4) Define um pool adaptativo para reduzir [B,1280,H,W] → [B,1280,1,1]
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.features(x)           
        x = self.pool(x)              
        x = x.view(x.size(0), -1)       
        return x



### Modelo RNN

In [8]:
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes, dropout = 0.3):
        super(GRUModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Camada GRU
        self.gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False
        )
        #self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x, lengths):
        # Empacotar sequências para otimização
        packed = nn.utils.rnn.pack_padded_sequence(
            x,
            lengths=lengths,
            batch_first=True,
            enforce_sorted=False
        )
        
        packed_output, hidden = self.gru(packed)
        last_hidden = hidden[-1] 

        # Aplica dropout sobre esse vetor
        #last_hidden = self.dropout(last_hidden)

        # Classificador final
        out = self.fc(last_hidden)
        return out



## Extração de características das imagens

### Configuração de paralelismo

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"O Resnet irá trabalhar em GPU?{torch.cuda.is_available()}")


# Define quantas threads usar para intra-op parallelism (ex.: convoluções)
torch.set_num_threads(4)
torch.set_num_interop_threads(2)

num_workersCNN = 0
batch_sizeCNN = 8
pin_memory = True


### Configuração de Dataloader

In [10]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((112,112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

datasetCNN = CNNDataset(
    annotations_file=csv_path,
    videosDir=videos_path,
    transform= transform
)
loaderCNN = DataLoader(
    datasetCNN,
    batch_size=batch_sizeCNN,
    num_workers=num_workersCNN,          # menos processos
    pin_memory=pin_memory,       # desabilita se não for usar GPU
    collate_fn=collate_fn
)

### Executar CNN

In [None]:
numberDivTeste = 5
numberDivVal = 3


extrator = CNNMobileNetV2().to(device=device)
extrator.eval()
logger.info("Iniciando extração de features usando MobileNetV2")
logger.info('-' * 50)
with torch.no_grad():
    for batch_idx,(videos, labels, names,lengths) in enumerate(tqdm(loaderCNN, desc="Extraindo batches")):
        start_time = time.time()
        try:
            B,T,C,H,W = videos.shape
            logger.info(f"Batch {batch_idx}: shape vídeos = {videos.shape}")
            flat = videos.view(B*T,C,H,W).to(device)
            tqdm.write("frames extraídos")
            feats_flats = extrator(flat)
            D = feats_flats.size(1)
            feats = feats_flats.view(B,T,D).cpu()

            for i in range(B):
                class_name = datasetCNN.idxToLabel(labels[i].item())
                video_name = names[i].replace(".mp4", "")
                real_T = lengths[i] 
                x = re.search('-',video_name)
                chave = int(video_name[x.end()])
                if chave % numberDivVal == 0:
                    out_path = f"{videos_path_val}{video_name}.pt"
                    torch.save({
                        'features': feats[i],
                        'length': real_T
                    }, out_path)
                elif chave % numberDivTeste == 0:
                    out_path = f"{videos_path_teste}{video_name}.pt"
                    torch.save({
                        'features': feats[i],
                        'length': real_T
                    }, out_path)
                else:
                    out_path = f"{features_path}{video_name}.pt"
                    torch.save({
                        'features': feats[i],
                        'length': real_T
                    }, out_path)
            # métricas: tempo e throughput
            elapsed = time.time() - start_time
            videos_per_sec = B / elapsed
            frames_per_sec = (B * T) / elapsed
            logger.info(f"Batch {batch_idx} processado em {elapsed:.2f}s — ")
            logger.info(f"{videos_per_sec:.1f} vídeos/s, {frames_per_sec:.1f} frames/s")

        except Exception as e:
            logger.error(f"Erro no batch {batch_idx}: {e}", exc_info=True)
            

features_name = os.listdir(features_path)
features_name_teste = os.listdir(videos_path_teste)
features_name_val = os.listdir(videos_path_val)
feature_base = []
class_base = []
feature_teste = []
class_teste = []
numberDivTeste = 5
class_val = []
feature_val = []
numberDivVal = 3
for cl in features_name:
    feature_base.append(cl)
    pos = re.search('Sinalizador',cl) 
    class_base.append(cl[2:pos.start()])
df = pd.DataFrame({
    "video_name": feature_base,
    "class": class_base,
  })
df.to_csv(csv_path_features,index=False)   
 
for cl in features_name_teste:
    feature_teste.append(cl)
    pos = re.search('Sinalizador',cl) 
    class_teste.append(cl[2:pos.start()])
df = pd.DataFrame({
    "video_name": feature_teste,
    "class": class_teste,
  })
df.to_csv(csv_path_teste,index=False)   
 
for cl in features_name_val:
    feature_val.append(cl)
    pos = re.search('Sinalizador',cl) 
    class_val.append(cl[2:pos.start()])
    
df = pd.DataFrame({
    "video_name": feature_val,
    "class": class_val,
  })
df.to_csv(csv_path_val,index=False)    



## Treino do RNN

### Configuração de paralelismo

In [None]:
num_workersRNN = 0
batch_sizeRNN = 8

### Configuração de Dataloader

In [None]:
rnn_dataset = RNNDataset(
    annotations_file=csv_path_features,
    featuresDir=features_path
)

rnn_loader = DataLoader(
    rnn_dataset,
    batch_size = batch_sizeRNN,
    shuffle = True,
    pin_memory = pin_memory,
    collate_fn = rnn_collate_fn,
    num_workers = num_workersRNN
)
rnn_dataset_val = RNNDataset(
    annotations_file=csv_path_val,
    featuresDir=videos_path_val
)

rnn_loader_val = DataLoader(
    rnn_dataset_val,
    batch_size = batch_sizeRNN,
    shuffle = True,
    pin_memory= pin_memory,
    collate_fn = rnn_collate_fn,
    num_workers = num_workersRNN
)
rnn_dataset_teste = RNNDataset(
    annotations_file=csv_path_teste,
    featuresDir=videos_path_teste
)

rnn_loader_teste = DataLoader(
    rnn_dataset_val,
    batch_size = batch_sizeRNN,
    shuffle = True,
    pin_memory = pin_memory,
    collate_fn = rnn_collate_fn,
    num_workers = num_workersRNN
)

In [None]:
INPUT_DIM = 1280  # Dimensão dos features da MobileNetV2
HIDDEN_DIM = 512
NUM_LAYERS = 2
NUM_CLASSES = 20
DROPOUT = 0.3

# Criação do modelo
gru_model = GRUModel(
    input_dim=INPUT_DIM,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    num_classes=NUM_CLASSES,
    dropout= DROPOUT
).to(device)

def train_model(model, train_loader, val_loader, num_epochs=15):
    logger.info("Treinamento RNN")
    logger.info('-' * 50)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
    # Listas para armazenar métricas
    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []
    
    
    for epoch in range(num_epochs):
        # Treino
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for sequences, labels, lengths in tqdm(train_loader, desc=f'Epoch {epoch+1}'):
            sequences = sequences.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(sequences, lengths)
            loss = criterion(outputs, labels)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Estatísticas
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        # Métricas de treino
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = 100 * correct / total
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)
        
        # Validação
        model.eval()
        val_running_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for sequences, labels, lengths in val_loader:
                sequences = sequences.to(device)
                labels = labels.to(device)
                
                outputs = model(sequences, lengths)
                loss = criterion(outputs, labels)
                
                val_running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        # Métricas de validação
        epoch_val_loss = val_running_loss / len(val_loader)
        #scheduler.step(epoch_val_loss)
        epoch_val_acc = 100 * val_correct / val_total
        val_losses.append(epoch_val_loss)
        val_accs.append(epoch_val_acc)
        
        logger.info(f'Epoch {epoch+1}')
        logger.info(f'Train Loss: {epoch_train_loss:.4f} | Train Acc: {epoch_train_acc:.2f}%')
        logger.info(f'Val Loss: {epoch_val_loss:.4f} | Val Acc: {epoch_val_acc:.2f}%')
        logger.info('-' * 50)
    
    # Plotar curvas
    plt.figure(figsize=(12, 5))
    
    # Curva de Loss
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title('Loss Curves')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    # Curva de Acurácia
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train Accuracy')
    plt.plot(val_accs, label='Val Accuracy')
    plt.title('Accuracy Curves')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    return model, train_losses, train_accs, val_losses, val_accs

# Iniciar treinamento
model_trained, train_loss, train_acc, val_loss, val_acc = train_model(gru_model,rnn_loader,rnn_loader_val,num_epochs=80)

In [13]:
metrics = {
    'train_loss': train_loss,
    'train_acc': train_acc,
    'val_loss': val_loss,
    'val_acc': val_acc
}
torch.save(metrics, 'training_metrics.pt')

In [17]:


def plot_confusion_matrix(model, dataloader, dataset, device):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for sequences, labels, lengths in tqdm(dataloader, desc="Gerando matriz de confusão"):
            sequences = sequences.to(device)
            labels = labels.to(device)
            
            outputs = model(sequences, lengths)
            _, preds = torch.max(outputs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Obter classes e nomes
    classes = sorted(dataset.label2idx.keys())
    class_names = [dataset.idx2label[i] for i in range(len(classes))]
    
    # Calcular matriz de confusão
    cm = confusion_matrix(all_labels, all_preds)
    
    # Normalizar por linha (por classe real)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    # Plotar
    plt.figure(figsize=(15, 12))
    sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap="Blues",
                xticklabels=class_names, yticklabels=class_names)
    
    plt.title('Matriz de Confusão Normalizada')
    plt.xlabel('Predito')
    plt.ylabel('Real')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    
    # Salvar versão não normalizada também
    plt.figure(figsize=(15, 12))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=class_names, yticklabels=class_names)
    
    plt.title('Matriz de Confusão (Contagens Absolutas)')
    plt.xlabel('Predito')
    plt.ylabel('Real')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    
    # Mostrar relatório de classificação
    print("\nRelatório de Classificação:")
    print(classification_report(
        all_labels, all_preds,
        target_names=class_names,
        zero_division=0
    ))
    
    return cm

In [None]:
# Supondo que você já tenha:
# - model_trained: seu modelo treinado
# - test_loader: DataLoader de teste
# - dataset: instância original do dataset (para mapeamento de classes)

# Gerar e plotar a matriz
conf_matrix = plot_confusion_matrix(
    model=model_trained,
    dataloader=rnn_loader_real_teste,
    dataset=rnn_dataset_teste,  # Passar o dataset original para mapear os labels
    device=device
)

In [20]:
import cv2
import torch
import numpy as np
from collections import deque
import threading
import torch.nn.functional as F
from torchvision import transforms

# Configurações
SEQUENCE_LENGTH = 50
IMG_SIZE = 112
CONFIDENCE_THRESHOLD = 0.4

class GesturePredictor:
    def __init__(self, cnn_path, gru_path, label_map):
        # Carregar modelos otimizados
        self.cnn_model = torch.jit.load(cnn_path, map_location=device).half().eval()
        self.gru_model = torch.jit.load(gru_path, map_location=device).half().eval()
        
        self.label_map = label_map
        self.frame_queue = deque(maxlen=SEQUENCE_LENGTH)
        self.prediction_queue = deque(maxlen=5)
        self.lock = threading.Lock()
        
        # Pré-processamento
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((IMG_SIZE, IMG_SIZE)),
            transforms.Grayscale(num_output_channels=3),  # Manter 3 canais
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        
        # Thread de processamento
        self.processing_thread = None
        self.current_frame = None
        self.running = True

    def preprocess_frame(self, frame):
        # Converter para RGB e redimensionar
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
        
      
        # Converter para tensor
        tensor = self.transform(frame).half().unsqueeze(0)
        return tensor.to(device)

    def process_frames(self):
        while self.running:
            if self.current_frame is not None:
                with self.lock:
                    frame = self.current_frame.copy()
                    self.current_frame = None
                
                processed_tensor = self.preprocess_frame(frame)
                with torch.no_grad():
                    features = self.cnn_model(processed_tensor)
                
                self.frame_queue.append(features)

    def predict_gesture(self):
        if len(self.frame_queue) < SEQUENCE_LENGTH:
            return "Coletando frames...", 0.0, (0, 0, 0)
        
        features = list(self.frame_queue)
        features_tensor = torch.cat(features).unsqueeze(0)
        
        with torch.no_grad():
            gru_out = self.gru_model(features_tensor, torch.tensor([SEQUENCE_LENGTH]))
            probabilities = F.softmax(gru_out, dim=1)
            confidence, pred_idx = torch.max(probabilities, 1)
        
        confidence = confidence.item()
        pred_idx = pred_idx.item()
        self.prediction_queue.append(pred_idx)
        
        # Filtro temporal
        final_pred = max(set(self.prediction_queue), key=self.prediction_queue.count)
        
        # Mapear cor
        color = (0, 255, 0) if confidence > CONFIDENCE_THRESHOLD else (0, 0, 255)
        
        return self.label_map[final_pred], confidence, color

    def run(self):
        self.processing_thread = threading.Thread(target=self.process_frames)
        self.processing_thread.start()
        
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            with self.lock:
                self.current_frame = frame.copy()
            
            # Obter predição
            label, confidence, color = self.predict_gesture()
            
            # Interface
            cv2.putText(frame, f"{label} ({confidence:.2f})", 
                        (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
            
            cv2.imshow('Reconhecimento de Gestos - Frame Completo', frame)
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        self.running = False
        self.processing_thread.join()
        cap.release()
        cv2.destroyAllWindows()

#%% Funções para salvar modelos e mapeamento
def save_models(cnn_model, gru_model, label_map, save_dir="saved_models"):
    os.makedirs(save_dir, exist_ok=True)
    
    # Salvar modelos com TorchScript
    cnn_scripted = torch.jit.script(cnn_model.cpu().eval())
    gru_scripted = torch.jit.script(gru_model.cpu().eval())
    
    # Salvar arquivos
    torch.save({
        'cnn_state_dict': cnn_model.state_dict(),
        'gru_state_dict': gru_model.state_dict(),
        'label_map': label_map
    }, os.path.join(save_dir, "models_checkpoint.pth"))
    
    cnn_scripted.save(os.path.join(save_dir, "cnn_model.pt"))
    gru_scripted.save(os.path.join(save_dir, "gru_model.pt"))
    
    print(f"Modelos salvos em: {save_dir}")

def load_models(save_dir="saved_models", device=device):
    # Carregar mapeamento de labels
    checkpoint = torch.load(os.path.join(save_dir, "models_checkpoint.pth"), map_location=device)
    label_map = checkpoint['label_map']
    
    # Carregar arquivos TorchScript
    cnn_model = torch.jit.load(os.path.join(save_dir, "cnn_model.pt"), map_location=device)
    gru_model = torch.jit.load(os.path.join(save_dir, "gru_model.pt"), map_location=device)
    
    # Carregar estados para treino continuado (opcional)
    cnn_model.load_state_dict(checkpoint['cnn_state_dict'])
    gru_model.load_state_dict(checkpoint['gru_state_dict'])
    
    return cnn_model, gru_model, label_map



In [26]:
extrator = CNNMobileNetV2().to(device=device)
save_models(extrator,model_trained,datasetCNN.idx2label)

Modelos salvos em: saved_models


In [24]:
extrator = CNNMobileNetV2().to(device=device)
save_models(extrator,model_trained,datasetCNN.idx2label)
# 1. Carregar dataset para pegar o mapeamento correto
dataset = CNNDataset(
    annotations_file=csv_path,
    videosDir=videos_path,
    transform=transforms.Compose([transforms.ToPILImage(), transforms.Resize((112,112)), transforms.ToTensor()])
)

# 2. Criar/Carregar modelos
if os.path.exists("saved_models"):
    print("Carregando modelos pré-treinados...")
    cnn_model, gru_model, label_map = load_models()
else:
    print("Inicializando novos modelos...")
    # Exemplo de criação dos modelos
    cnn_model = CNNMobileNetV2().to(device)
    gru_model = GRUModel(
        input_dim=1280,
        hidden_dim=512,
        num_layers=2,
        num_classes=len(dataset.label2idx)
    ).to(device)
    
    # Salvar com mapeamento original do dataset
    save_models(cnn_model, gru_model, dataset.idx2label)

# 3. Verificar mapeamento


# 4. Inicializar predictor
predictor = GesturePredictor(
    cnn_path=os.path.join("saved_models", "cnn_model.pt"),
    gru_path=os.path.join("saved_models", "gru_model.pt"),
    label_map=label_map
)

# 5. Executar
print("\nIniciando reconhecimento... (Pressione 'q' para sair)")
predictor.run()

NameError: name 'datasetCNN' is not defined