# Machine Translation - WMT14## Complete Training and Evaluation PipelineThis notebook implements an end-to-end machine translation - wmt14 pipeline.**Dataset:** WMT14 En-De**Model:** Transformer**Framework:** PyTorch + Transformers### Table of Contents1. [Setup and Imports](#setup)2. [Device Configuration (CUDA/CPU)](#device)3. [Data Loading and Exploration](#data)4. [Data Preprocessing](#preprocessing)5. [Model Architecture](#model)6. [Training Loop](#training)7. [Evaluation and Metrics](#evaluation)8. [Inference Demo](#inference)9. [Save Results](#save)

## 1. Setup and Imports <a id='setup'></a>

In [None]:
# Install required packages (run once)!pip install torch torchvision transformers datasets matplotlib seaborn scikit-learn tqdm numpy pandas -q

In [None]:
import torchimport torch.nn as nnimport torch.optim as optimfrom torch.utils.data import DataLoaderimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom tqdm.notebook import tqdmimport jsonimport osfrom datetime import datetime# Set plotting styleplt.style.use('seaborn-v0_8-darkgrid')sns.set_palette('husl')%matplotlib inlineprint("✅ All imports successful!")

## 2. Device Configuration (CUDA/CPU) <a id='device'></a>

In [None]:
# Automatic device selection - works for both CUDA and CPUdevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')print(f"Using device: {device}")if device.type == 'cuda':    print(f"GPU Name: {torch.cuda.get_device_name(0)}")    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")    print(f"CUDA Version: {torch.version.cuda}")else:    print("No GPU available, using CPU")# Set random seeds for reproducibilitytorch.manual_seed(42)np.random.seed(42)if torch.cuda.is_available():    torch.cuda.manual_seed(42)    torch.backends.cudnn.deterministic = True

## 3. Data Loading and Exploration <a id='data'></a>### Dataset: WMT14 En-DeThis section handles data downloading and initial exploration.

In [None]:
# Create data directorydata_dir = '../datasets/06_machine_translation'os.makedirs(data_dir, exist_ok=True)print(f"Dataset: WMT14 En-De")print(f"Data directory: {data_dir}")# TODO: Add dataset-specific loading code# This will be customized for each model type

## 4. Data Preprocessing <a id='preprocessing'></a>

In [None]:
# TODO: Add preprocessing and augmentation# This section will include:# - Data transformations# - Augmentation strategies# - Train/val/test splits# - DataLoader creationprint("✅ Data preprocessing complete")

## 5. Model Architecture <a id='model'></a>

In [None]:
# TODO: Define model architecture# Model: Transformerclass Model(nn.Module):    def __init__(self):        super(Model, self).__init__()        # Define layers here        pass    def forward(self, x):        # Define forward pass        pass# Initialize model and move to devicemodel = Model().to(device)# Count parameterstotal_params = sum(p.numel() for p in model.parameters())trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)print(f"✅ Model created and moved to {device}")print(f"Total parameters: {total_params:,}")print(f"Trainable parameters: {trainable_params:,}")

## 6. Training Loop <a id='training'></a>

In [None]:
# Training configurationnum_epochs = 50learning_rate = 0.001# Loss function and optimizercriterion = nn.CrossEntropyLoss()optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)print("Training configuration:")print(f"  Epochs: {num_epochs}")print(f"  Learning rate: {learning_rate}")print(f"  Optimizer: AdamW")print(f"  Scheduler: CosineAnnealingLR")

In [None]:
# Training functiondef train_epoch(model, loader, criterion, optimizer, device):    model.train()    running_loss = 0.0    correct = 0    total = 0    pbar = tqdm(loader, desc='Training')    for inputs, labels in pbar:        inputs, labels = inputs.to(device), labels.to(device)        optimizer.zero_grad()        outputs = model(inputs)        loss = criterion(outputs, labels)        loss.backward()        # Gradient clipping        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)        optimizer.step()        running_loss += loss.item()        _, predicted = outputs.max(1)        total += labels.size(0)        correct += predicted.eq(labels).sum().item()        pbar.set_postfix({            'loss': f'{running_loss/(pbar.n+1):.4f}',            'acc': f'{100.*correct/total:.2f}%'        })    return running_loss / len(loader), 100. * correct / total# Validation functiondef validate(model, loader, criterion, device):    model.eval()    running_loss = 0.0    correct = 0    total = 0    with torch.no_grad():        pbar = tqdm(loader, desc='Validation')        for inputs, labels in pbar:            inputs, labels = inputs.to(device), labels.to(device)            outputs = model(inputs)            loss = criterion(outputs, labels)            running_loss += loss.item()            _, predicted = outputs.max(1)            total += labels.size(0)            correct += predicted.eq(labels).sum().item()            pbar.set_postfix({                'loss': f'{running_loss/(pbar.n+1):.4f}',                'acc': f'{100.*correct/total:.2f}%'            })    return running_loss / len(loader), 100. * correct / totalprint("✅ Training and validation functions defined")

In [None]:
# Training loophistory = {    'train_loss': [],    'train_acc': [],    'val_loss': [],    'val_acc': [],    'lr': []}best_acc = 0.0results_dir = '../06_machine_translation/results'models_dir = '../06_machine_translation/models'os.makedirs(results_dir, exist_ok=True)os.makedirs(models_dir, exist_ok=True)print("Starting training...\n")start_time = datetime.now()for epoch in range(num_epochs):    print(f"\nEpoch {epoch+1}/{num_epochs}")    print("-" * 50)    # TODO: Implement actual training loop    # train_loss, train_acc = train_epoch(model, trainloader, criterion, optimizer, device)    # val_loss, val_acc = validate(model, testloader, criterion, device)    # Update scheduler    scheduler.step()    current_lr = optimizer.param_groups[0]['lr']    # Save history    # history['train_loss'].append(train_loss)    # history['train_acc'].append(train_acc)    # history['val_loss'].append(val_loss)    # history['val_acc'].append(val_acc)    # history['lr'].append(current_lr)    # Save best model    # if val_acc > best_acc:    #     best_acc = val_acc    #     torch.save(model.state_dict(), f'{models_dir}/best_model.pt')end_time = datetime.now()training_time = (end_time - start_time).total_seconds()print(f"\nTraining time: {training_time/60:.2f} minutes")

## 7. Evaluation and Metrics <a id='evaluation'></a>

In [None]:
# Load best model# model.load_state_dict(torch.load(f'{models_dir}/best_model.pt'))# Get predictions and calculate metricsfrom sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix# TODO: Implement evaluationprint("✅ Evaluation complete")

## 8. Inference Demo <a id='inference'></a>

In [None]:
# Inference on sample datamodel.eval()# TODO: Implement inference demonstrationprint("✅ Inference demo complete")

## 9. Save Results <a id='save'></a>

In [None]:
# Save metrics to JSONmetrics = {    'model_name': 'Transformer',    'dataset': 'WMT14 En-De',    'framework': 'PyTorch + Transformers',    'training_time_minutes': training_time / 60,    'num_epochs': num_epochs,    'total_parameters': total_params,    'trainable_parameters': trainable_params,    # Add more metrics as needed}with open(f'{results_dir}/metrics.json', 'w') as f:    json.dump(metrics, f, indent=2)print("✅ Metrics saved to metrics.json")print("\n" + "="*50)print("NOTEBOOK EXECUTION COMPLETE")print("="*50)print(f"\nSaved files:")print(f"  - Model: {models_dir}/best_model.pt")print(f"  - Metrics: {results_dir}/metrics.json")