In [82]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [112]:
# Fix NumPy version issue first
!pip uninstall -y numpy
!pip install 'numpy<2.0.0'

# Clone repository and install dependencies
!git clone https://github.com/Msingi-AI/Sauti-Ya-Kenya.git
%cd Sauti-Ya-Kenya
!pip install -r requirements.txt

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy<2.0.0
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
Successfully installed numpy-1.26.4
Cloning into 'Sauti-Ya-Kenya'...
remote: Enumerating objects: 1529, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 1529 (delta 0), reused 2 (delta 0), pack-reused 1523 (from 1)[K
Receiving objects: 100% (1529/1529), 133.08 MiB | 31.82 MiB/s, done.
Resolving deltas: 100% (664/664), done.
/content/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya/Sauti-Ya-Kenya


In [113]:
import os
import shutil
from pathlib import Path

# Create data structure
!mkdir -p data/processed data/tokenizer

# Copy data from Drive
DRIVE_PATH = "/content/drive/MyDrive/Sauti-Ya-Kenya-1"

!cp -r "/content/drive/MyDrive/Sauti-Ya-Kenya-1/data/processed" data/
!cp -r "/content/drive/MyDrive/Sauti-Ya-Kenya-1/tokenizer" data/


# Verify files
print("\nChecking data structure:")
!ls -R data/

cp: cannot stat '/content/drive/MyDrive/Sauti-Ya-Kenya-1/tokenizer': No such file or directory

Checking data structure:
data/:
metadata.json  processed  text	tokenizer

data/processed:
metadata.csv  Speaker_003  Speaker_007	Speaker_011  Speaker_015  Speaker_019
Speaker_000   Speaker_004  Speaker_008	Speaker_012  Speaker_016  Speaker_020
Speaker_001   Speaker_005  Speaker_009	Speaker_013  Speaker_017
Speaker_002   Speaker_006  Speaker_010	Speaker_014  Speaker_018

data/processed/Speaker_000:
clip_0000_mel.pt  clip_0000_text.txt  clip_0000.wav

data/processed/Speaker_001:
clip_0001_mel.pt    clip_0001.wav	clip_0003_text.txt  clip_0005_text.txt
clip_0001_text.txt  clip_0002_text.txt	clip_0004_text.txt

data/processed/Speaker_002:
clip_0002_mel.pt  clip_0002_text.txt  clip_0002.wav

data/processed/Speaker_003:
clip_0003_mel.pt  clip_0003_text.txt  clip_0003.wav

data/processed/Speaker_004:
clip_0004_mel.pt  clip_0004_text.txt  clip_0004.wav

data/processed/Speaker_005:
clip_0005_mel.pt  c

In [114]:
# Create __init__.py to make src a package
!touch src/__init__.py

# Add src to Python path
import sys
sys.path.insert(0, os.path.abspath('src'))

# Fix imports in train.py
with open('src/train.py', 'r') as f:
    content = f.read()

# Fix relative imports
content = content.replace('from .model', 'from model')
content = content.replace('from .preprocessor', 'from preprocessor')

with open('src/train.py', 'w') as f:
    f.write(content)

print("Fixed imports in train.py")

Fixed imports in train.py


In [115]:
# Configure GPU
import torch
torch.cuda.empty_cache()

if torch.cuda.is_available():
    device = torch.device('cuda')
    torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True
    print(f"Using GPU: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB")
else:
    raise RuntimeError("No GPU available!")

Using GPU: Tesla T4
GPU Memory: 14.7GB


In [117]:
!mkdir -p /content/drive/MyDrive/Sauti-Ya-Kenya-1/checkpoints

In [118]:
!python src/train.py \
    --batch_size 4 \
    --grad_accum 4 \
    --checkpoint_dir /content/drive/MyDrive/Sauti-Ya-Kenya-1/checkpoints \
    --data_dir data/processed \
    --metadata_path data/processed/metadata.csv \
    --tokenizer_path data/tokenizer/tokenizer.model \
    --epochs 100 \
    --save_every 5

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Mel shapes in batch:
Mel 0: torch.Size([636, 80])
Mel 1: torch.Size([512, 80])
Mel 2: torch.Size([788, 80])
Mel 3: torch.Size([438, 80])
Max text length: 8
Max mel length: 788
Loaded and processed mel shape for clip_0012: torch.Size([645, 80])

Loaded and tokenized text for clip_0013: torch.Size([8])
Raw text: '42691262'
Token IDs: [142, 37, 148, 74, 21, 37, 148, 37]
Loaded and processed mel shape for clip_0013: torch.Size([639, 80])

Loaded and tokenized text for clip_0014: torch.Size([8])
Raw text: '41928559'
Token IDs: [142, 21, 74, 37, 164, 123, 123, 74]
Loaded and processed mel shape for clip_0014: torch.Size([357, 80])

Loaded and tokenized text for clip_0015: torch.Size([8])
Raw text: '42146596'
Token IDs: [142, 37, 21, 142, 148, 123, 74, 148]
Loaded and processed mel shape for clip_0015: torch.Size([912, 80])

Text shapes in batch:
Text 0: torch.Size([8])
Text 1: torch.Size([8])
Text 2: torch.Size([8])
Text 3: to