# 🎙️ Sauti Ya Kenya - TTS Model Training

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Msingi-AI/Sauti-Ya-Kenya/blob/main/notebooks/train_tts_model.ipynb)

This notebook provides an optimized training environment for the Kenyan Swahili Text-to-Speech model. Features:
- 🚀 GPU-accelerated training
- 💾 Efficient memory management
- 📊 Progress tracking
- 🔄 Automatic checkpoint saving

## Setup Instructions
1. Upload `data.zip` to your Google Drive
2. Connect to a GPU runtime (Runtime → Change runtime type → GPU)
3. Run all cells in order

In [None]:
# Verify GPU availability
!nvidia-smi

import torch
print(f"\nPyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name()}")
    print(f"Memory allocated: {torch.cuda.memory_allocated() / 1e9:.1f}GB")
    print(f"Memory cached: {torch.cuda.memory_reserved() / 1e9:.1f}GB")

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Clone repository and install dependencies
!git clone https://github.com/Msingi-AI/Sauti-Ya-Kenya.git
%cd Sauti-Ya-Kenya
!pip install -r requirements.txt

# Create symlink to checkpoints directory in Drive
import os
from pathlib import Path

drive_path = Path('/content/drive/MyDrive/Sauti-Ya-Kenya')
checkpoints_dir = drive_path / 'checkpoints'
checkpoints_dir.mkdir(parents=True, exist_ok=True)

if not os.path.exists('checkpoints'):
    !ln -s {checkpoints_dir} checkpoints

# Extract data
data_zip = drive_path / 'data.zip'
if data_zip.exists():
    !unzip -q {data_zip}
    print("✅ Data extracted successfully")
else:
    raise FileNotFoundError("Please upload data.zip to /content/drive/MyDrive/Sauti-Ya-Kenya/")

In [None]:
# Verify data structure
import pandas as pd
import numpy as np

def verify_data():
    # Check metadata files
    train_meta = pd.read_csv('processed_data/train_metadata.csv')
    val_meta = pd.read_csv('processed_data/val_metadata.csv')
    print(f"Train samples: {len(train_meta)}")
    print(f"Val samples: {len(val_meta)}")
    
    # Verify feature files for first training sample
    sample_id = train_meta.iloc[0]['id']
    features = ['text_tokens', 'mel', 'duration']
    
    for feature in features:
        path = f'processed_data/train/{feature}/{sample_id}.npy'
        data = np.load(path)
        print(f"\n{feature} shape: {data.shape}")

verify_data()

In [None]:
# Optimize memory settings
import gc
import torch

def optimize_memory():
    # Empty CUDA cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # Run garbage collector
    gc.collect()
    
    # Set memory allocator settings
    if torch.cuda.is_available():
        torch.cuda.set_per_process_memory_fraction(0.9)
        torch.backends.cudnn.benchmark = True

optimize_memory()

# Print memory status
if torch.cuda.is_available():
    print(f"Memory allocated: {torch.cuda.memory_allocated() / 1e9:.1f}GB")
    print(f"Memory cached: {torch.cuda.memory_reserved() / 1e9:.1f}GB")

In [None]:
# Start training
!python -m src.train \
    --batch_size 8 \
    --grad_accum 4 \
    --epochs 100 \
    --save_every 10 \
    --checkpoint_dir checkpoints