# Voice AI Intent Classification - Model Training

This notebook trains the XLM-RoBERTa intent classifier with:
1. Data loading and preprocessing
2. Model initialization with Focal Loss
3. Training loop with early stopping
4. Model evaluation and saving

In [None]:
import sys
sys.path.append('..')

import torch
import pandas as pd
import numpy as np
from pathlib import Path
import yaml
import warnings
warnings.filterwarnings('ignore')

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 1. Load Configuration

In [None]:
# Load config
with open('../configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(f"  Model: {config['model']['name']}")
print(f"  Batch size: {config['training']['batch_size']}")
print(f"  Learning rate: {config['training']['learning_rate']}")
print(f"  Epochs: {config['training']['num_epochs']}")

## 2. Load and Prepare Data

In [None]:
from src.data.dataset import load_data, create_dataloaders, IntentLabelEncoder
from src.data.preprocessor import TextPreprocessor
from transformers import AutoTokenizer

# Initialize components
tokenizer = AutoTokenizer.from_pretrained(config['model']['name'])
preprocessor = TextPreprocessor()
label_encoder = IntentLabelEncoder()

print(f"Number of intents: {label_encoder.num_labels}")
print(f"Intents: {label_encoder.intents}")

In [None]:
# Create dataloaders
dataloaders, _ = create_dataloaders(
    train_path='../' + config['data']['train_path'],
    val_path='../' + config['data']['val_path'],
    test_path='../' + config['data']['test_path'],
    batch_size=config['training']['batch_size'],
    max_length=config['model']['max_length'],
)

print(f"Train batches: {len(dataloaders['train'])}")
print(f"Val batches: {len(dataloaders['val'])}")
print(f"Test batches: {len(dataloaders['test'])}")

## 3. Initialize Model

In [None]:
from src.models.intent_classifier import create_model

# Create model with Focal Loss
model = create_model(
    model_name=config['model']['name'],
    num_labels=config['model']['num_labels'],
    dropout=config['model']['dropout'],
    use_focal_loss=True,
    focal_gamma=config['focal_loss']['gamma'],
)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model: {config['model']['name']}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

## 4. Train Model

In [None]:
from src.models.trainer import IntentTrainer, TrainingConfig

# Create output directory
output_dir = Path('../outputs/models')
output_dir.mkdir(parents=True, exist_ok=True)

# Training configuration
training_config = TrainingConfig(
    learning_rate=config['training']['learning_rate'],
    weight_decay=config['training']['weight_decay'],
    num_epochs=config['training']['num_epochs'],
    warmup_ratio=config['training']['warmup_ratio'],
    early_stopping_patience=config['training']['early_stopping_patience'],
    output_dir=str(output_dir),
)

# Create trainer
trainer = IntentTrainer(model, training_config, device=str(device))

print("Trainer initialized!")
print(f"Output directory: {output_dir}")

In [None]:
# Train the model
# NOTE: This will take several minutes depending on your hardware
# On CPU: ~5-10 min per epoch
# On GPU: ~1-2 min per epoch

history = trainer.train(
    train_loader=dataloaders['train'],
    val_loader=dataloaders['val'],
)

print("\nTraining complete!")

## 5. Plot Training History

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Loss
axes[0].plot(history['train_loss'], label='Train')
axes[0].plot(history['val_loss'], label='Validation')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training & Validation Loss')
axes[0].legend()

# F1 Score
axes[1].plot(history['val_f1'], label='Val F1', color='green')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Macro F1')
axes[1].set_title('Validation F1 Score')
axes[1].legend()

# Learning Rate
axes[2].plot(history['learning_rate'], color='orange')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Learning Rate')
axes[2].set_title('Learning Rate Schedule')

plt.tight_layout()
plt.savefig('../outputs/plots/training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nBest Val F1: {max(history['val_f1']):.4f}")

## 6. Evaluate on Test Set

In [None]:
from src.evaluation.metrics import compute_metrics, EvaluationReport

# Load best model
trainer.load_checkpoint(output_dir / 'best_model.pt')

# Evaluate on test set
test_metrics = trainer.validate(dataloaders['test'])

print("Test Set Results:")
print(f"  Loss: {test_metrics['val_loss']:.4f}")
print(f"  Accuracy: {test_metrics['val_accuracy']:.4f}")
print(f"  Macro F1: {test_metrics['val_f1']:.4f}")

In [None]:
# Full evaluation with detailed metrics
model.eval()
all_preds = []
all_labels = []
all_confidences = []

with torch.no_grad():
    for batch in dataloaders['test']:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels']
        
        outputs = model(input_ids, attention_mask)
        probs = outputs['probabilities']
        preds = probs.argmax(dim=-1)
        confs = probs.max(dim=-1).values
        
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(labels.tolist())
        all_confidences.extend(confs.cpu().tolist())

# Compute full metrics
report = compute_metrics(
    all_labels, all_preds, 
    intent_names=label_encoder.intents,
    confidences=all_confidences
)

report.print_summary()

## 7. Save Final Model

In [None]:
print("Model saved at:")
print(f"  Best model: {output_dir / 'best_model.pt'}")
print(f"  Final model: {output_dir / 'final_model.pt'}")
print(f"  Training history: {output_dir / 'training_history.json'}")

print("\n Training notebook complete!")