<a href="https://colab.research.google.com/github/Msingi-AI/Sauti-Ya-Kenya/blob/main/colab/train_tts_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sauti Ya Kenya - TTS Training

This notebook sets up the training environment for our Swahili TTS model using TPU/GPU acceleration.

## Dataset Upload Instructions
You'll need to upload your Common Voice dataset files in this order:
1. `validated_sentences.tsv` ✅
2. `clip_durations.tsv` ✅
3. Upload individual audio clips in batches:
   - Create a folder named `clips` in your Google Drive
   - Upload your audio files there
   - We'll mount the Drive and copy them over

In [None]:
# Check available hardware
import torch
try:
    import torch_xla
    import torch_xla.core.xla_model as xm
    print("TPU available!")
    DEVICE = xm.xla_device()
    USE_TPU = True
except ImportError:
    print("TPU not found, checking for GPU...")
    USE_TPU = False
    if torch.cuda.is_available():
        print(f"GPU available: {torch.cuda.get_device_name(0)}")
        DEVICE = torch.device('cuda')
    else:
        print("No GPU found, using CPU")
        DEVICE = torch.device('cpu')

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create project directories
!mkdir -p cv-corpus-21.0-delta-2025-03-14/sw/clips

In [None]:
# Clone repository and install dependencies
!git clone https://github.com/Msingi-AI/Sauti-Ya-Kenya.git
%cd Sauti-Ya-Kenya

# Install dependencies
!pip install -r requirements.txt

# Install TPU support if available
if USE_TPU:
    !pip install cloud-tpu-client==0.10 torch_xla[tpu]>=2.0

In [None]:
# Upload TSV files
from google.colab import files
import os

print("Please upload validated_sentences.tsv and clip_durations.tsv")
uploaded = files.upload()

# Move files to correct location
dataset_path = "cv-corpus-21.0-delta-2025-03-14/sw"
os.makedirs(dataset_path, exist_ok=True)

for filename in uploaded.keys():
    os.rename(filename, os.path.join(dataset_path, filename))
    print(f"Moved {filename} to {dataset_path}")

In [None]:
# Copy audio clips from Google Drive
import shutil

drive_clips_path = "/content/drive/MyDrive/common_voice_clips"
local_clips_path = os.path.join(dataset_path, "clips")

print("Copying audio clips from Google Drive...")
print(f"Source: {drive_clips_path}")
print(f"Destination: {local_clips_path}")

# Create destination directory
os.makedirs(local_clips_path, exist_ok=True)

# Copy files in batches to avoid memory issues
import glob
batch_size = 100
audio_files = glob.glob(os.path.join(drive_clips_path, "*.mp3"))

for i in range(0, len(audio_files), batch_size):
    batch = audio_files[i:i + batch_size]
    for audio_file in batch:
        filename = os.path.basename(audio_file)
        shutil.copy2(audio_file, os.path.join(local_clips_path, filename))
    print(f"Copied {min(i + batch_size, len(audio_files))}/{len(audio_files)} files")

print("\nAudio files copied successfully!")
print(f"Total files: {len(os.listdir(local_clips_path))}")

In [None]:
# Process dataset
!python src/prepare_local_dataset.py \
    --dataset_path "cv-corpus-21.0-delta-2025-03-14/sw" \
    --output_dir "processed_data" \
    --clips_path "clips" \
    --sentences_file "validated_sentences.tsv" \
    --durations_file "clip_durations.tsv"

In [None]:
# Run data augmentation
!python src/augment_data.py \
    --data_dir "processed_data" \
    --output_dir "augmented_data" \
    --num_augmentations 3

In [None]:
# Set up training configuration
import json

# Optimize batch size based on hardware
if USE_TPU:
    batch_size = 128  # TPUs handle larger batches efficiently
elif torch.cuda.is_available():
    batch_size = 32   # Standard GPU batch size
else:
    batch_size = 8    # Smaller batch for CPU

config = {
    "train_data_dir": "augmented_data",
    "batch_size": batch_size,
    "learning_rate": 0.001,
    "num_epochs": 100,
    "save_every": 10,
    "device": str(DEVICE),
    "max_len": 10000,  # Based on our Swahili optimization
    "checkpoint_dir": "checkpoints",
    "use_tpu": USE_TPU,
    "gradient_accumulation_steps": 1 if USE_TPU else 2  # Help with memory on GPU
}

with open('config.json', 'w') as f:
    json.dump(config, f, indent=2)

print(f"Using device: {config['device']}")
print(f"Batch size: {config['batch_size']}")

In [None]:
# Start training
!python src/train.py --config config.json

In [None]:
# Run evaluation
!python src/evaluation.py \
    --model_path "checkpoints/best.pt" \
    --test_data "augmented_data/test" \
    --output_dir "evaluation_results"

In [None]:
# Save model to Google Drive
drive_path = "/content/drive/MyDrive/Sauti-Ya-Kenya/models"
os.makedirs(drive_path, exist_ok=True)

# Copy best model
shutil.copy("checkpoints/best.pt", f"{drive_path}/best.pt")

# Save evaluation results
shutil.copytree("evaluation_results", f"{drive_path}/evaluation_results", dirs_exist_ok=True)

print("Model and evaluation results saved to Google Drive")