In [3]:
import torch
import torchvision
torch.backends.cudnn.benchmark = True  
# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")

if torch.cuda.is_available():
    print(f"✅ {torch.cuda.device_count()} GPU(s) detected:")
    for i in range(torch.cuda.device_count()):
        print(f"  - {torch.cuda.get_device_name(i)}")
    print(f"Current device: {torch.cuda.current_device()}")
else:
    print("❌ No GPU detected. Using CPU instead.")

# Set default tensor type to CUDA
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print("Default tensor type set to CUDA")
    
    # Try emptying cache
    torch.cuda.empty_cache()
    print("CUDA cache emptied")

PyTorch version: 2.7.0+cu126
Torchvision version: 0.22.0+cu126
✅ 1 GPU(s) detected:
  - NVIDIA GeForce GTX 1650
Current device: 0
Default tensor type set to CUDA
CUDA cache emptied


  _C._set_default_tensor_type(t)


In [4]:
from loaders.dataset import *

In [5]:
# Create and validate dataloaders
train_loader, val_loader, test_loader = create_dataloaders(ROOT_DIR)

# Improved validate_dataloader function with better error handling
def validate_dataloader(loader, name):
    print(f"Validating dataloader '{name}'...")
    
    # First check if dataset is empty
    if len(loader.dataset) == 0:
        print(f"  ⚠️ Dataloader '{name}' is empty (contains no data)")
        return False
    
    try:
        # Try to get the first batch
        iterator = iter(loader)
        batch = next(iterator)
        frames, labels = batch
        print(f"  ✅ Dataloader '{name}' is valid - batch shape: {frames.shape}")
        print(f"     Labels: {labels}")
        return True
    except StopIteration:
        print(f"  ⚠️ Dataloader '{name}' is empty (no batches)")
        return False
    except Exception as e:
        print(f"  ❌ Error getting first batch: {e}")
        print(f"  Error type: {type(e).__name__}")
        return False

# Execute validation with detailed feedback
print("VALIDATING DATALOADERS")
print("-----------------")
valid_train = validate_dataloader(train_loader, "Training")
valid_val = validate_dataloader(val_loader, "Validation")
valid_test = validate_dataloader(test_loader, "Test")
print("-----------------")

# Check dataset access permissions and files
def check_dataset_path(root_dir):
    print(f"\nChecking data directory: {root_dir}")
    
    if not os.path.exists(root_dir):
        print(f"❌ ERROR: Directory does not exist: {root_dir}")
        return False
    
    print(f"✅ Directory exists: {root_dir}")
    
    # Check for class directories
    for cls in CLASSES:
        cls_dir = os.path.join(root_dir, str(cls))
        if not os.path.exists(cls_dir):
            print(f"❌ ERROR: Class directory {cls} does not exist: {cls_dir}")
            continue
            
        print(f"✅ Class directory {cls} exists")
        
        # Check split directories
        for split in SPLITS:
            split_dir = os.path.join(cls_dir, split)
            if not os.path.exists(split_dir):
                print(f"❌ ERROR: Split directory {split} does not exist: {split_dir}")
                continue
                
            # Count video files
            video_files = [f for f in os.listdir(split_dir) 
                          if f.lower().endswith(('.mp4', '.avi'))]
            print(f"   - {split}: {len(video_files)} video files")
            
            # Check file sizes
            if video_files:
                sample_file = os.path.join(split_dir, video_files[0])
                size_mb = os.path.getsize(sample_file) / (1024 * 1024)
                print(f"     Example: {video_files[0]} ({size_mb:.2f} MB)")
    
    return True

# Check dataset structure
check_dataset_path(ROOT_DIR)

Processing folder: ../../DAiSEE/DataSet/Aug/0/train
  → 989 videos found for class 0
Processing folder: ../../DAiSEE/DataSet/Aug/1/train
  → 1908 videos found for class 1
Processing folder: ../../DAiSEE/DataSet/Aug/2/train
  → 1050 videos found for class 2
Processing folder: ../../DAiSEE/DataSet/Aug/3/train
  → 1050 videos found for class 3
Total videos for train: 4997
Processing folder: ../../DAiSEE/DataSet/Aug/0/validation
  → 9 videos found for class 0
Processing folder: ../../DAiSEE/DataSet/Aug/1/validation
  → 68 videos found for class 1
Processing folder: ../../DAiSEE/DataSet/Aug/2/validation
  → 225 videos found for class 2
Processing folder: ../../DAiSEE/DataSet/Aug/3/validation
  → 225 videos found for class 3
Total videos for validation: 527
Processing folder: ../../DAiSEE/DataSet/Aug/0/test
  → 9 videos found for class 0
Processing folder: ../../DAiSEE/DataSet/Aug/1/test
  → 69 videos found for class 1
Processing folder: ../../DAiSEE/DataSet/Aug/2/test
  → 225 videos found f

True

In [6]:
import torch.nn as nn
from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights


class VideoRNNModel(nn.Module):
    def __init__(
        self,
        num_classes=len(CLASSES),
        feature_dim=128,
        num_layers=2,
        dropout=0.1,
        rnn_type='gru'        # 'gru' o 'lstm'
    ):
        super().__init__()
        # Backbone EfficientNet-B0
        weights = MobileNet_V3_Small_Weights.IMAGENET1K_V1
        base    = mobilenet_v3_small(weights=weights)
        
        self.feature_extractor = nn.Sequential(
            *list(base.features),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten()
        )
        # Proyección a feature_dim
        self.feature_reduce = nn.Linear(576, feature_dim)
        self.layer_norm     = nn.LayerNorm(feature_dim)
        self.dropout_in     = nn.Dropout(dropout)
        # RNN
        if rnn_type.lower() == 'lstm':
            self.rnn = nn.LSTM(
                feature_dim, feature_dim, num_layers=num_layers,
                batch_first=True, dropout=dropout
            )
        else:
            self.rnn = nn.GRU(
                feature_dim, feature_dim, num_layers=num_layers,
                batch_first=True, dropout=dropout
            )
        # Clasificación
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(feature_dim, num_classes)
        )

    def forward(self, x):
        # x: [B, T, C, H, W]
        B, T, C, H, W = x.shape
        # 1) CNN por frame
        x = x.view(B * T, C, H, W)
        x = self.feature_extractor(x)
        x = self.feature_reduce(x)
        # 2) Prepara secuencia
        x = x.view(B, T, -1)             # [B, T, feature_dim]
        x = self.layer_norm(x)
        x = self.dropout_in(x)
        # 3) RNN
        # out: [B, T, feature_dim], h_n: [num_layers, B, feature_dim]
        out, h_n = self.rnn(x)
        # usa última salida de la capa superior
        if isinstance(h_n, tuple):      # LSTM devuelve (h_n, c_n)
            h_n = h_n[0]
        last = h_n[-1]                  # [B, feature_dim]
        # 4) Clasificación
        logits = self.classifier(last)  # [B, num_classes]
        return logits

# Reemplazar instanciación:
model = VideoRNNModel(rnn_type='gru').to(device)
total_params     = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Total parameters: 1,199,780
Trainable parameters: 1,199,780


In [7]:
import time
from tqdm import tqdm
from torch import nn
import torch

learning_rate = 1e-4
weight_decay  = 1e-5
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Añade después de importar torch
from torch.cuda.amp import GradScaler

# 1) Crea el scaler una vez
scaler = GradScaler()

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, targets in tqdm(dataloader, desc="Training", leave=False):
        inputs, targets = inputs.to(device, non_blocking=True), targets.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        # Standard precision forward/backward
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        batch_size = targets.size(0)
        running_loss += loss.item() * batch_size
        total       += batch_size
        _, preds    = outputs.max(1)
        correct    += preds.eq(targets).sum().item()

    avg_loss = running_loss / total
    acc      = 100.0 * correct / total
    return avg_loss, acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    acc = 100.0 * correct / total
    avg_loss = running_loss / len(dataloader)
    return avg_loss, acc

print("\nTraining model...")

try:
    # Training parameters
    num_epochs = 30
    
    # History for plotting
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    # Start training
    start_time = time.time()
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        
        # Validate
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    total_time = time.time() - start_time
    print(f"\nTraining completed in {total_time:.2f} seconds")
    print("✅ Training completed")

except Exception as e:
    print(f"❌ Error during training: {e}")
    print(f"Error type: {type(e).__name__}")

AttributeError: partially initialized module 'torch._dynamo' has no attribute 'config' (most likely due to a circular import)

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Train')
plt.plot(history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot training & validation accuracy values
plt.subplot(1, 2, 2)
plt.plot(history['train_acc'], label='Train')
plt.plot(history['val_acc'], label='Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.tight_layout()
plt.show()

# Test on the test set
test_loss, test_acc = validate(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%")

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# 3.2 Recolecta predicciones y etiquetas verdaderas
all_preds = []
all_labels = []
model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, preds = outputs.max(1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(targets.cpu().tolist())

# 3.3 Construye y muestra la matriz
cm = confusion_matrix(all_labels, all_preds, labels=CLASSES)
disp = ConfusionMatrixDisplay(cm, display_labels=CLASSES)

plt.subplot(1, 3, 3)
disp.plot(ax=plt.gca(), cmap='Blues', values_format='d')
plt.title('Confusion Matrix')

plt.tight_layout()
plt.show()