# Sauti ya Kenya - TTS Training

This notebook trains the Kenyan Swahili TTS model using FastSpeech 2 architecture with memory optimizations.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create project directory structure
!mkdir -p /content/drive/MyDrive/Sauti-Ya-Kenya/data/{processed,tokenizer}
!mkdir -p /content/drive/MyDrive/Sauti-Ya-Kenya/checkpoints

In [None]:
# Clone repository and install dependencies
!git clone https://github.com/Msingi-AI/Sauti-Ya-Kenya.git
%cd Sauti-Ya-Kenya
!pip install -r requirements.txt
!pip install torch==2.2.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118

In [None]:
# Upload data to Drive first!
print("Before running this cell:")
print("1. Upload your processed data to: /content/drive/MyDrive/Sauti-Ya-Kenya/data/processed/")
print("2. Upload your tokenizer files to: /content/drive/MyDrive/Sauti-Ya-Kenya/data/tokenizer/")

# Check if data exists
!ls -la /content/drive/MyDrive/Sauti-Ya-Kenya/data/processed/
!ls -la /content/drive/MyDrive/Sauti-Ya-Kenya/data/tokenizer/

# Create local directories
!mkdir -p data/{processed,tokenizer}

# Copy data only if source exists
if [ -d "/content/drive/MyDrive/Sauti-Ya-Kenya/data/processed" ]; then
    !cp -r "/content/drive/MyDrive/Sauti-Ya-Kenya/data/processed"/* data/processed/
fi

if [ -d "/content/drive/MyDrive/Sauti-Ya-Kenya/data/tokenizer" ]; then
    !cp -r "/content/drive/MyDrive/Sauti-Ya-Kenya/data/tokenizer"/* data/tokenizer/
fi

In [None]:
# First, zip your local data
!cd /content && zip -r drive/MyDrive/Sauti-Ya-Kenya/data.zip Sauti-Ya-Kenya/data/
print("Data has been zipped to drive/MyDrive/Sauti-Ya-Kenya/data.zip")
print("You can now download this file and upload it to Drive manually if needed")

In [None]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler

# Add src to path
sys.path.append(os.path.join(os.getcwd(), 'src'))

from preprocessor import SwahiliTokenizer
from model import FastSpeech2
from dataset import TTSDataset
from config import ModelConfig

# Memory optimizations
torch.backends.cudnn.benchmark = True
scaler = GradScaler()

# Load config
config = ModelConfig()

# Initialize tokenizer
tokenizer = SwahiliTokenizer(vocab_size=8000)
tokenizer.load('data/tokenizer/tokenizer.model')

# Create dataset and dataloader
dataset = TTSDataset(
    data_dir='data/processed',
    metadata_file='data/processed/metadata.csv',
    tokenizer=tokenizer
)

dataloader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)