# üé§ Fine-Tuning StyleTTS2 pour le Darija

## Version GitHub + HuggingFace (Setup rapide ~3 min)

**Sources:**
- üì¶ Code: `github.com/Racim679/arable-tts`
- üéµ Audio: `huggingface.co/datasets/RacimPoly6/darija-tts-dataset`

### Pr√©requis:
- GPU: T4, V100, ou A100 (minimum 16GB VRAM)


## 1. V√©rification GPU


In [None]:
!nvidia-smi
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")


## 2. Installation des d√©pendances


In [None]:
!pip install -q phonemizer==3.2.1 munch accelerate pydub nltk g2p_en num2words inflect unidecode pyyaml librosa scipy matplotlib soundfile
!pip install -q torch torchaudio transformers einops einops-exts tqdm omegaconf huggingface_hub
!pip install -q git+https://github.com/resemble-ai/monotonic_align.git
!apt-get install -qq espeak-ng
print("‚úÖ D√©pendances install√©es!")


## 3. T√©l√©chargement du code (GitHub) et audio (HuggingFace)


In [None]:
import os
from huggingface_hub import snapshot_download

# Cloner StyleTTS2
if not os.path.exists("/content/StyleTTS2"):
    !git clone https://github.com/yl4579/StyleTTS2.git /content/StyleTTS2
print("‚úÖ StyleTTS2 clon√©!")

# Cloner ton repo avec les configs
if not os.path.exists("/content/arable-tts"):
    !git clone https://github.com/Racim679/arable-tts.git /content/arable-tts
print("‚úÖ Configs clon√©es!")

# T√©l√©charger les audio depuis HuggingFace
print("üì• T√©l√©chargement audio depuis HuggingFace...")
snapshot_download(
    repo_id="RacimPoly6/darija-tts-dataset",
    repo_type="dataset",
    local_dir="/content/dataset_darija",
    local_dir_use_symlinks=False
)
print("‚úÖ Audio t√©l√©charg√©!")


## 4. T√©l√©chargement des mod√®les pr√©-entra√Æn√©s


In [None]:
!mkdir -p /content/StyleTTS2/Models/LibriTTS /content/StyleTTS2/Models/Darija
!mkdir -p /content/StyleTTS2/Utils/JDC /content/StyleTTS2/Utils/ASR /content/StyleTTS2/Utils/PLBERT

print("üì• T√©l√©chargement des mod√®les...")
!wget -q --show-progress -O /content/StyleTTS2/Models/LibriTTS/epochs_2nd_00020.pth \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Models/LibriTTS/epochs_2nd_00020.pth
!wget -q -O /content/StyleTTS2/Models/LibriTTS/config.yml \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Models/LibriTTS/config.yml

!wget -q --show-progress -O /content/StyleTTS2/Utils/JDC/bst.t7 \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Utils/JDC/bst.t7

!wget -q --show-progress -O /content/StyleTTS2/Utils/ASR/epoch_00080.pth \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Utils/ASR/epoch_00080.pth
!wget -q -O /content/StyleTTS2/Utils/ASR/config.yml \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Utils/ASR/config.yml

!wget -q --show-progress -O /content/StyleTTS2/Utils/PLBERT/step_1000000.t7 \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Utils/PLBERT/step_1000000.t7
!wget -q -O /content/StyleTTS2/Utils/PLBERT/config.yml \
    https://huggingface.co/yl4579/StyleTTS2-LibriTTS/resolve/main/Utils/PLBERT/config.yml

print("‚úÖ Mod√®les t√©l√©charg√©s!")


## 5. Patches PyTorch 2.6+ (CRITIQUE!)


In [None]:
import re, os

files_to_patch = [
    "/content/StyleTTS2/models.py",
    "/content/StyleTTS2/Utils/ASR/models.py",
    "/content/StyleTTS2/Utils/JDC/model.py",
    "/content/StyleTTS2/Utils/PLBERT/util.py",
    "/content/StyleTTS2/meldataset.py",
]

for filepath in files_to_patch:
    if os.path.exists(filepath):
        with open(filepath, 'r') as f:
            content = f.read()
        if 'torch.load' in content and 'weights_only' not in content:
            new_content = re.sub(r'torch\.load\(([^)]+)\)', r'torch.load(\1, weights_only=False)', content)
            with open(filepath, 'w') as f:
                f.write(new_content)
            print(f"‚úÖ Patched: {os.path.basename(filepath)}")
        else:
            print(f"‚ÑπÔ∏è OK: {os.path.basename(filepath)}")


## 6. Pr√©paration des donn√©es


In [None]:
import json, random, os, shutil

os.makedirs("/content/StyleTTS2/Data", exist_ok=True)
os.makedirs("/content/StyleTTS2/wavs", exist_ok=True)

# Copier les audio
print("üéµ Copie des fichiers audio...")
for f in os.listdir("/content/dataset_darija/wavs"):
    if f.endswith('.wav'):
        shutil.copy(f"/content/dataset_darija/wavs/{f}", f"/content/StyleTTS2/wavs/{f}")
print(f"‚úÖ {len(os.listdir('/content/StyleTTS2/wavs'))} fichiers copi√©s")

# Cr√©er train/val lists
with open("/content/dataset_darija/metadata.json", 'r') as f:
    metadata = json.load(f)

random.seed(42)
random.shuffle(metadata)
split = int(len(metadata) * 0.95)
train_data, eval_data = metadata[:split], metadata[split:]

with open('/content/StyleTTS2/Data/train_list.txt', 'w') as f:
    for item in train_data:
        f.write(f"{item['audio_file']}|{item['text'].replace(chr(10), ' ')}|0\n")

with open('/content/StyleTTS2/Data/val_list.txt', 'w') as f:
    for item in eval_data:
        f.write(f"{item['audio_file']}|{item['text'].replace(chr(10), ' ')}|0\n")

print(f"‚úÖ Train: {len(train_data)}, Val: {len(eval_data)}")


## 7. Configuration


In [None]:
import yaml, os

config = {
    'log_dir': 'Models/Darija', 'save_freq': 10, 'device': 'cuda',
    'epochs_1st': 0, 'epochs_2nd': 80, 'batch_size': 4, 'max_len': 400,
    'pretrained_model': 'Models/LibriTTS/epochs_2nd_00020.pth',
    'data_params': {
        'train_data': 'Data/train_list.txt', 'val_data': 'Data/val_list.txt',
        'root_path': '', 'OOD_data': 'Data/OOD_texts.txt', 'min_length': 50, 'sample_rate': 24000,
    },
    'preprocess_params': {'sr': 24000, 'spect_params': {'n_fft': 2048, 'win_length': 1200, 'hop_length': 300, 'n_mels': 80}},
    'model_params': {
        'multispeaker': False, 'dim_in': 64, 'hidden_dim': 512, 'max_conv_dim': 512,
        'n_layer': 3, 'n_mels': 80, 'n_token': 178, 'max_dur': 50, 'style_dim': 128, 'dropout': 0.2,
        'decoder': {'type': 'istftnet', 'resblock_kernel_sizes': [3,7,11], 'upsample_rates': [10,6],
                    'upsample_initial_channel': 512, 'resblock_dilation_sizes': [[1,3,5],[1,3,5],[1,3,5]],
                    'upsample_kernel_sizes': [20,12], 'gen_istft_n_fft': 20, 'gen_istft_hop_size': 5},
        'slm': {'model': 'microsoft/wavlm-base-plus', 'sr': 16000, 'hidden': 768, 'nlayers': 13, 'initial_channel': 64},
        'diffusion': {'embedding_mask_proba': 0.1,
                      'transformer': {'num_layers': 3, 'num_heads': 8, 'head_features': 64, 'multiplier': 2},
                      'dist': {'sigma_data': 0.2, 'estimate_sigma_data': True, 'mean': -3.0, 'std': 1.0}}
    },
    'loss_params': {'lambda_mel': 5., 'lambda_gen': 1., 'lambda_slm': 1., 'lambda_mono': 1., 'lambda_s2s': 1.,
                    'lambda_F0': 1., 'lambda_norm': 1., 'lambda_dur': 1., 'lambda_ce': 20., 'lambda_sty': 1.,
                    'lambda_diff': 1., 'diff_epoch': 20, 'joint_epoch': 40},
    'optimizer_params': {'lr': 0.0001},
    'slmadv_params': {'min_len': 400, 'max_len': 500, 'batch_percentage': 0.5, 'iter': 10, 'thresh': 5., 'scale': 0.01, 'sig': 1.5},
    'F0_path': 'Utils/JDC/bst.t7', 'ASR_config': 'Utils/ASR/config.yml',
    'ASR_path': 'Utils/ASR/epoch_00080.pth', 'PLBERT_dir': 'Utils/PLBERT/',
}

os.makedirs('/content/StyleTTS2/Configs', exist_ok=True)
with open('/content/StyleTTS2/Configs/config_darija_ft.yml', 'w') as f:
    yaml.dump(config, f)
print("‚úÖ Config cr√©√©e!")


## 8. V√©rification


In [None]:
import os
checks = {
    "Config": "/content/StyleTTS2/Configs/config_darija_ft.yml",
    "Audio": "/content/StyleTTS2/wavs",
    "Train list": "/content/StyleTTS2/Data/train_list.txt",
    "Mod√®le": "/content/StyleTTS2/Models/LibriTTS/epochs_2nd_00020.pth",
    "JDC": "/content/StyleTTS2/Utils/JDC/bst.t7",
    "ASR": "/content/StyleTTS2/Utils/ASR/epoch_00080.pth",
    "PLBERT": "/content/StyleTTS2/Utils/PLBERT/step_1000000.t7",
}
all_ok = all(os.path.exists(p) for p in checks.values())
for name, path in checks.items():
    print(f"{'‚úÖ' if os.path.exists(path) else '‚ùå'} {name}")
print(f"\n{'üéâ PR√äT!' if all_ok else '‚ö†Ô∏è Fichiers manquants!'}")


## 9. üöÄ Lancement du Fine-Tuning

‚ö†Ô∏è **Dur√©e: 8-12h** | Checkpoints sauvegard√©s tous les 10 epochs


In [None]:
%cd /content/StyleTTS2
!accelerate launch --mixed_precision=no --num_processes=1 train_finetune.py --config_path ./Configs/config_darija_ft.yml


## 10. Sauvegarde sur Drive (optionnel)


In [None]:
from google.colab import drive
drive.mount('/content/drive')
!mkdir -p /content/drive/MyDrive/darija_checkpoints
!cp -r /content/StyleTTS2/Models/Darija/* /content/drive/MyDrive/darija_checkpoints/
print("‚úÖ Checkpoints sauvegard√©s sur Drive!")
