# Resemble.ai TTS Integration
This notebook handles instant voice cloning (Chatterbox) and TTS generation for all speakers using Resemble.ai.

## 1. Setup and Authentication

In [40]:
import os
import json
import requests
import re
import time
import subprocess
import base64
from resemble import Resemble
from pathlib import Path

# Set your Resemble.ai API Key here
RESEMBLE_API_KEY = "TbfsdrDTGOPJK9TEvGndhAtt"

Resemble.api_key(RESEMBLE_API_KEY)

PROJECT_UUID = None

def setup_resemble():
    global PROJECT_UUID
    if RESEMBLE_API_KEY == "YOUR_RESEMBLE_API_KEY":
        print("‚ùå ERROR: Please set your RESEMBLE_API_KEY first!")
        return False
    try:
        # 1. Verify Authentication
        Resemble.v2.voices.all(1, 1)
        print("‚úÖ Authentication successful!")
        
        # 2. Setup/Find Project
        projects_resp = Resemble.v2.projects.all(1)
        projects = projects_resp.get('items', [])
        tts_project = next((p for p in projects if p['name'] == "TTS Integration Project"), None)
        
        if not tts_project:
            print("Creating 'TTS Integration Project'...")
            response = Resemble.v2.projects.create("TTS Integration Project", "Project for automated TTS generation")
            if response.get('success'):
                PROJECT_UUID = response['item']['uuid']
            else:
                print(f"‚ùå Failed to create project: {response}")
                return False
        else:
            PROJECT_UUID = tts_project['uuid']
            
        print(f"‚úÖ Using Project UUID: {PROJECT_UUID}")
        return True
    except Exception as e:
        print(f"‚ùå Setup failed: {e}")
        return False

setup_resemble()

‚úÖ Authentication successful!
‚úÖ Using Project UUID: b3352a9c


True

## 2. TTS Generation Utility

In [41]:
def convert_to_wav(input_path):
    output_path = input_path.replace('.webm', '.wav').replace('.mp3', '.wav').replace('.m4a', '.wav')
    if os.path.exists(output_path):
        return output_path
    
    print(f"  Converting {os.path.basename(input_path)} to WAV...")
    try:
        subprocess.run(['ffmpeg', '-i', input_path, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '22050', output_path], 
                       check=True, capture_output=True)
        return output_path
    except Exception as e:
        print(f"    ‚ùå Conversion failed: {e}")
        return None

def generate_resemble_audio(speaker_id, original_audio_path, original_text, new_text, output_dir):
    if not PROJECT_UUID:
        return None, None
        
    wav_path = convert_to_wav(original_audio_path)
    if not wav_path: return None, None

    voice_name = f"3min-test-chatterbox-{speaker_id}"
    consent_string = "I give my consent to Resemble AI to use my voice for cloning and synthesis."
    
    voice_uuid = None
    try:
        # 1. Create Voice
        response = Resemble.v2.voices.create(voice_name, consent_string, voice_type='rapid')
        if not response.get('success'):
            print(f"    ‚ùå Failed to create voice for {speaker_id}: {response}")
            return None, None
        
        voice_uuid = response['item']['uuid']
        
        # 2. Upload Recording
        with open(wav_path, 'rb') as f:
            rec_resp = Resemble.v2.recordings.create(
                voice_uuid,
                f,
                name="original_clip",
                text=original_text,
                is_active=True,
                emotion="neutral"
            )
        
        if not rec_resp.get('success'):
            print(f"    ‚ùå Failed to upload recording for {speaker_id}: {rec_resp}")
            Resemble.v2.voices.delete(voice_uuid)
            return None, None

        # 3. Build
        Resemble.v2.voices.build(voice_uuid)

        # 4. Wait
        ready = False
        for i in range(30):
            status_resp = Resemble.v2.voices.get(voice_uuid)
            item = status_resp.get('item', {})
            v_status = item.get('status')
            if v_status in ['processed', 'finished', 'ready'] or item.get('voice_status') == 'Ready':
                ready = True
                break
            time.sleep(5)
        
        if not ready:
            print(f"    ‚ùå Voice {speaker_id} timed out.")
            Resemble.v2.voices.delete(voice_uuid)
            return None, None

        # 5. Generate Clips
        def create_clip(text, filename):
            resp = Resemble.v2.clips.create_sync(PROJECT_UUID, voice_uuid, text)
            if resp.get('success'):
                item = resp.get('item', resp)
                data = None
                possible_keys = ['audio_content', 'link', 'url', 'audio_src', 'audio']
                for key in possible_keys:
                    if key in item:
                        content = item[key]
                        if not content: continue
                        if isinstance(content, str) and content.startswith('http'):
                            data = requests.get(content).content
                        elif isinstance(content, str):
                            try: data = base64.b64decode(content)
                            except: data = content.encode()
                        else: data = content
                        break
                
                if data and len(data) > 500:
                    output_path = os.path.join(output_dir, filename)
                    with open(output_path, 'wb') as f: f.write(data)
                    return output_path
            return None

        orig_path = create_clip(original_text, f"{speaker_id}-resemble-chatterbox-original.wav")
        new_path = create_clip(new_text, f"{speaker_id}-resemble-chatterbox-new.wav")
        
        # 6. Cleanup
        Resemble.v2.voices.delete(voice_uuid)
        
        if orig_path and new_path:
            return os.path.basename(orig_path), os.path.basename(new_path)
        return None, None
        
    except Exception as e:
        print(f"    ‚ùå Error processing {speaker_id}: {e}")
        if voice_uuid: 
            try: Resemble.v2.voices.delete(voice_uuid)
            except: pass
        return None, None

## 3. Mass Generation and Metadata Update
This section processes all 120 speakers and updates `speakers.ts`.

In [42]:
import re
import json

SPEAKERS_TS_PATH = "/Users/usamait/Desktop/TTS/output/speakers.ts"
AUDIO_BASE_DIR = "/Users/usamait/Desktop/TTS/output/audio/resemble"
os.makedirs(AUDIO_BASE_DIR, exist_ok=True)

NEW_TEXT = "The quick brown fox jumps over the lazy dog. But what about the man who watched over them? He barely noticed."

def load_speakers():
    with open(SPEAKERS_TS_PATH, 'r') as f:
        content = f.read()
        # Robust extraction for the speakers array
        match = re.search(r'speakers:\s*Speaker\[\]\s*=\s*(\[.*?\]);', content, re.DOTALL)
        if not match:
             match = re.search(r'=\s*(\[.*\])', content, re.DOTALL)
        if match:
            json_str = match.group(1)
            return json.loads(json_str), content
    return None, None

def save_speakers(speakers, original_content):
    json_str = json.dumps(speakers, indent=2)
    # Use a lambda to avoid re.sub evaluating backslashes in the replacement string
    # This prevents 'bad escape \u' errors if the JSON contains unicode escapes.
    pattern = r'(speakers:\s*Speaker\[\]\s*=\s*)\[.*?\];'
    new_content = re.sub(pattern, lambda m: m.group(1) + json_str + ';', original_content, flags=re.DOTALL)
    
    if new_content == original_content:
        # Fallback for simpler match
        pattern_fallback = r'(=\s*)\[.*\]'
        new_content = re.sub(pattern_fallback, lambda m: m.group(1) + json_str, original_content, flags=re.DOTALL)
        
    with open(SPEAKERS_TS_PATH, 'w') as f:
        f.write(new_content)
    print(f"    ‚úÖ Checkpoint saved: {SPEAKERS_TS_PATH}")

speakers, original_ts_content = load_speakers()

if speakers:
    print(f"Found {len(speakers)} speakers. Starting batch processing...")
    
    processed_count = 0
    for i, spk in enumerate(speakers):
        spk_id = spk['id']
        
        # Check if already processed to allow resuming
        if any(m['modelId'] == 'resemble-chatterbox' for m in spk.get('models', [])):
            continue
            
        print(f"  [{i+1}/{len(speakers)}] Processing {spk_id}...")
        
        orig_audio_full = f"/Users/usamait/Desktop/TTS/output{spk['originalAudio']}"
        orig_text = spk['originalText']
        
        res_orig, res_new = generate_resemble_audio(spk_id, orig_audio_full, orig_text, NEW_TEXT, AUDIO_BASE_DIR)
        
        if res_orig and res_new:
            # Add model metadata
            new_model = {
                "modelId": "resemble-chatterbox",
                "modelName": "Resemble Chatterbox",
                "clonedOriginalAudio": f"/audio/resemble/{res_orig}",
                "clonedOriginalText": orig_text,
                "clonedNewAudio": f"/audio/resemble/{res_new}",
                "clonedNewText": NEW_TEXT
            }
            
            if 'models' not in spk:
                spk['models'] = []
            spk['models'].append(new_model)
            
            processed_count += 1
            # Save every 5 speakers as a checkpoint
            if processed_count % 5 == 0:
                save_speakers(speakers, original_ts_content)
        else:
            print(f"    ‚ùå Failed to process {spk_id}")
            
        # Small delay to keep things stable
        time.sleep(1)

    # Final save
    save_speakers(speakers, original_ts_content)
    print(f"\nüéâ Batch processing complete! {processed_count} new speakers added.")
else:
    print("‚ùå Failed to load speakers.")

Found 150 speakers. Starting batch processing...
  [121/150] Processing spk-153293...
  Converting user_153293_65t_d4ca3af8.webm to WAV...
    ‚ùå Conversion failed: Command '['ffmpeg', '-i', '/Users/usamait/Desktop/TTS/output/audio/original/user_153293_65t_d4ca3af8.webm', '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '22050', '/Users/usamait/Desktop/TTS/output/audio/original/user_153293_65t_d4ca3af8.wav']' returned non-zero exit status 254.
    ‚ùå Failed to process spk-153293
  [122/150] Processing spk-185885...
  Converting user_185885_72t_333e845a.webm to WAV...
    ‚ùå Conversion failed: Command '['ffmpeg', '-i', '/Users/usamait/Desktop/TTS/output/audio/original/user_185885_72t_333e845a.webm', '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '22050', '/Users/usamait/Desktop/TTS/output/audio/original/user_185885_72t_333e845a.wav']' returned non-zero exit status 254.
    ‚ùå Failed to process spk-185885
  [123/150] Processing spk-173685...
  Converting user_173685_59t_49f8e87d.webm to WAV...

KeyboardInterrupt: 