In [18]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
import os
import cv2
import numpy as np
from PIL import Image

In [None]:
# Dataset class untuk memuat sequential frames
class DrowsinessDataset(Dataset):
    def __init__(self, video_folders, labels, transform=None, num_frames=30):
        self.video_folders = video_folders
        self.labels = labels
        self.transform = transform
        self.num_frames = num_frames
    
    def __len__(self):
        return len(self.video_folders)
    
    def __getitem__(self, idx):
        folder_path = self.video_folders[idx]
        label = self.labels[idx]
        
        # Load semua frames dari folder
        frames = []
        for i in range(1, self.num_frames + 1):
            frame_path = os.path.join(folder_path, f"{i:02d}.jpg")
            
            if os.path.exists(frame_path):
                img = Image.open(frame_path).convert('RGB')
                if self.transform:
                    img = self.transform(img)
                frames.append(img)
            else:
                # Jika frame tidak ada, gunakan frame kosong
                if self.transform:
                    img = self.transform(Image.new('RGB', (224, 224)))
                else:
                    img = torch.zeros(3, 224, 224)
                frames.append(img)
        
        # Stack frames menjadi tensor (num_frames, channels, height, width)
        frames = torch.stack(frames)
        
        return frames, label

# Transform untuk preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Path ke folder output
output_dir = r"c:\Users\andre\Documents\Binus\Lomba\DataSlayer 3\output"
drowsiness_dir = os.path.join(output_dir, "drowsiness_faces")
non_drowsiness_dir = os.path.join(output_dir, "non-drowsiness_faces")

# Kumpulkan semua folder video dan labelnya
video_folders = []
labels = []

# Label 1 untuk drowsiness
for folder_name in os.listdir(drowsiness_dir):
    folder_path = os.path.join(drowsiness_dir, folder_name)
    if os.path.isdir(folder_path) and folder_name != "manifest.csv":
        video_folders.append(folder_path)
        labels.append(1)  # drowsiness = 1

# Label 0 untuk non-drowsiness
for folder_name in os.listdir(non_drowsiness_dir):
    folder_path = os.path.join(non_drowsiness_dir, folder_name)
    if os.path.isdir(folder_path) and folder_name != "manifest.csv":
        video_folders.append(folder_path)
        labels.append(0)  # non-drowsiness = 0

# Split data menjadi train dan test (80:20)
train_folders, test_folders, train_labels, test_labels = train_test_split(
    video_folders, labels, test_size=0.2, random_state=42, stratify=labels
)

# Buat dataset
train_dataset = DrowsinessDataset(train_folders, train_labels, transform=transform)
test_dataset = DrowsinessDataset(test_folders, test_labels, transform=transform)

# Buat dataloader
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# Print informasi dataset
print(f"Total data: {len(video_folders)}")
print(f"Train data: {len(train_dataset)}")
print(f"Test data: {len(test_dataset)}")
print(f"Drowsiness samples: {sum(labels)}")
print(f"Non-drowsiness samples: {len(labels) - sum(labels)}")
print(f"\nBatch size: {batch_size}")
print(f"Train batches: {len(train_loader)}")
print(f"Test batches: {len(test_loader)}")

Total data: 120
Train data: 96
Test data: 24
Drowsiness samples: 60
Non-drowsiness samples: 60

Batch size: 8
Train batches: 12
Test batches: 3


In [20]:
import timm
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# CUDA optimization
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")

Using device: cuda
GPU: NVIDIA GeForce GTX 1650 Ti
CUDA Version: 11.8


In [22]:
class EfficientNetFeatureExtractor(nn.Module):
    def __init__(self, feature_dim=256, freeze=True):
        super().__init__()
        # Load pretrained EfficientNetB0
        self.effnet = timm.create_model('efficientnet_b0', pretrained=True, num_classes=0)
        
        # Freeze backbone
        if freeze:
            for param in self.effnet.parameters():
                param.requires_grad = False
        
        # Bottleneck layer: 1280 -> 256
        self.bottleneck = nn.Sequential(
            nn.Linear(1280, feature_dim),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
    
    def forward(self, x):
        # x: (B*T, C, H, W)
        features = self.effnet(x)  # (B*T, 1280)
        features = self.bottleneck(features)  # (B*T, 256)
        return features

In [23]:
class LSTMEncoder(nn.Module):
    def __init__(self, input_size=256, hidden_size=128, num_layers=1, bidirectional=True):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional,
            dropout=0.0  # No dropout karena 1 layer
        )
    
    def forward(self, x):
        # x: (B, T, F)
        out, (h_n, c_n) = self.lstm(x)
        # out: (B, T, hidden_size * 2) jika bidirectional
        # Ambil output di time step terakhir
        last_out = out[:, -1, :]  # (B, hidden_size * 2)
        return last_out

In [24]:
class MLPClassifier(nn.Module):
    def __init__(self, input_size=256, hidden_size=64, num_classes=1):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_size, num_classes)
        )
    
    def forward(self, x):
        return self.classifier(x)

In [25]:
class DrowsinessDetector(nn.Module):
    def __init__(self, feature_dim=256, lstm_hidden=128, mlp_hidden=64, freeze_cnn=True):
        super().__init__()
        self.feature_extractor = EfficientNetFeatureExtractor(feature_dim=feature_dim, freeze=freeze_cnn)
        
        lstm_output_size = lstm_hidden * 2  # bidirectional
        self.temporal_encoder = LSTMEncoder(input_size=feature_dim, hidden_size=lstm_hidden, bidirectional=True)
        self.classifier = MLPClassifier(input_size=lstm_output_size, hidden_size=mlp_hidden, num_classes=1)
    
    def forward(self, x):
        # x: (B, T, C, H, W)
        B, T, C, H, W = x.shape
        
        # Reshape untuk feature extraction
        x = x.view(B * T, C, H, W)  # (B*T, C, H, W)
        
        # Extract features
        features = self.feature_extractor(x)  # (B*T, feature_dim)
        
        # Reshape kembali untuk LSTM
        features = features.view(B, T, -1)  # (B, T, feature_dim)
        
        # LSTM encoding
        lstm_out = self.temporal_encoder(features)  # (B, lstm_hidden*2)
        
        # Classification
        logits = self.classifier(lstm_out)  # (B, 1)
        
        return logits.squeeze(-1)  # (B,)

In [26]:
model = DrowsinessDetector(
    feature_dim=256,
    lstm_hidden=128,
    mlp_hidden=64,
    freeze_cnn=True
).to(device)

# Hitung parameter
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")

Total parameters: 4,747,261
Trainable parameters: 739,713
Frozen parameters: 4,007,548


In [27]:
criterion = nn.BCEWithLogitsLoss()

# Optimizer - hanya untuk trainable parameters
optimizer = Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-3,
    weight_decay=1e-4
)

# Learning rate scheduler
scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=5,

)

In [28]:
def train_one_epoch(model, train_loader, criterion, optimizer, device, use_amp=True):
    model.train()
    running_loss = 0.0
    predictions = []
    targets = []
    
    scaler = torch.cuda.amp.GradScaler() if use_amp else None
    
    for batch_idx, (frames, labels) in enumerate(train_loader):
        frames = frames.to(device, non_blocking=True)
        labels = labels.float().to(device, non_blocking=True)
        
        optimizer.zero_grad()
        
        if use_amp and torch.cuda.is_available():
            with torch.cuda.amp.autocast():
                logits = model(frames)
                loss = criterion(logits, labels)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            logits = model(frames)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item()
        
        # Predictions
        preds = (torch.sigmoid(logits) > 0.5).float()
        predictions.extend(preds.cpu().numpy())
        targets.extend(labels.cpu().numpy())
        
        if (batch_idx + 1) % 5 == 0:
            print(f"  Batch {batch_idx + 1}/{len(train_loader)}, Loss: {loss.item():.4f}")
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = accuracy_score(targets, predictions)
    
    return epoch_loss, epoch_acc

# Cell 12: Validation Function
def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    predictions = []
    targets = []
    
    with torch.no_grad():
        for frames, labels in val_loader:
            frames = frames.to(device, non_blocking=True)
            labels = labels.float().to(device, non_blocking=True)
            
            logits = model(frames)
            loss = criterion(logits, labels)
            
            running_loss += loss.item()
            
            preds = (torch.sigmoid(logits) > 0.5).float()
            predictions.extend(preds.cpu().numpy())
            targets.extend(labels.cpu().numpy())
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = accuracy_score(targets, predictions)
    epoch_precision = precision_score(targets, predictions, zero_division=0)
    epoch_recall = recall_score(targets, predictions, zero_division=0)
    epoch_f1 = f1_score(targets, predictions, zero_division=0)
    
    return epoch_loss, epoch_acc, epoch_precision, epoch_recall, epoch_f1, predictions, targets


In [29]:
num_epochs = 50
patience = 10
best_val_loss = float('inf')
patience_counter = 0

train_losses = []
val_losses = []
train_accs = []
val_accs = []

print("Starting Training (Stage 1: Frozen EfficientNet)...")
print("=" * 60)

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch + 1}/{num_epochs}")
    print("-" * 60)
    
    # Training
    train_loss, train_acc = train_one_epoch(
        model, train_loader, criterion, optimizer, device, use_amp=True
    )
    
    # Validation
    val_loss, val_acc, val_prec, val_rec, val_f1, val_preds, val_targets = validate(
        model, test_loader, criterion, device
    )
    
    # Scheduler step
    scheduler.step(val_loss)
    
    # Save metrics
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)
    
    print(f"\nTrain Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    print(f"Val Precision: {val_prec:.4f}, Val Recall: {val_rec:.4f}, Val F1: {val_f1:.4f}")
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # Save best model
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
            'val_acc': val_acc,
        }, 'best_model.pth')
        print("âœ“ Saved best model")
    else:
        patience_counter += 1
        print(f"Patience: {patience_counter}/{patience}")
    
    if patience_counter >= patience:
        print(f"\nEarly stopping triggered after {epoch + 1} epochs")
        break

print("\n" + "=" * 60)
print("Training Complete!")

Starting Training (Stage 1: Frozen EfficientNet)...

Epoch 1/50
------------------------------------------------------------


  scaler = torch.cuda.amp.GradScaler() if use_amp else None
  with torch.cuda.amp.autocast():


  Batch 5/12, Loss: 0.6587
  Batch 10/12, Loss: 0.7177


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.08 GiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 1.79 GiB is allocated by PyTorch, and 367.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
ax1.plot(train_losses, label='Train Loss', marker='o')
ax1.plot(val_losses, label='Val Loss', marker='s')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()
ax1.grid(True)

# Accuracy plot
ax2.plot(train_accs, label='Train Accuracy', marker='o')
ax2.plot(val_accs, label='Val Accuracy', marker='s')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_title('Training and Validation Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()


In [None]:
cm = confusion_matrix(val_targets, val_preds)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Non-Drowsy', 'Drowsy'],
            yticklabels=['Non-Drowsy', 'Drowsy'])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

print("\nFinal Metrics:")
print(f"Accuracy: {val_acc:.4f}")
print(f"Precision: {val_prec:.4f}")
print(f"Recall: {val_rec:.4f}")
print(f"F1-Score: {val_f1:.4f}")