# 🎙️ Voice AI Training System - Setup
## Environment Configuration & Validation

This notebook will:
1. ✅ Detect and verify GPU availability
2. ✅ Install all required dependencies (Python 3.12+ compatible)
3. ✅ Mount Google Drive for persistent storage
4. ✅ Create project directory structure
5. ✅ Validate all imports and CUDA support
6. ✅ Display configuration summary

**⚠️ Important:** Run all cells in order. This should complete in 3-5 minutes.

**📝 Note:** This system now uses Python 3.12 compatible TTS libraries (coqui-tts from Idiap Research Institute).

## Step 1: GPU Detection & Verification

In [None]:
import subprocess
import sys
import os

# Check GPU availability
gpu_info = !nvidia-smi
print("🔍 GPU Detection:")
print("="*60)

try:
    # Get GPU info
    gpu_name = !nvidia-smi --query-gpu=name --format=csv,noheader
    gpu_memory = !nvidia-smi --query-gpu=memory.total --format=csv,noheader
    
    if gpu_name and gpu_name[0]:
        print(f"✅ GPU Detected: {gpu_name[0]}")
        print(f"✅ GPU Memory: {gpu_memory[0]}")
        
        # Extract memory in GB
        memory_str = gpu_memory[0].strip().split()[0]
        gpu_memory_gb = int(memory_str) / 1024
        print(f"✅ GPU Memory (GB): {gpu_memory_gb:.1f} GB")
        
        # Store for later use
        os.environ['GPU_MEMORY_GB'] = str(gpu_memory_gb)
    else:
        print("⚠️ No GPU detected. This system requires GPU for training.")
        print("Please enable GPU in Runtime > Change runtime type > GPU")
        
except Exception as e:
    print(f"⚠️ Error detecting GPU: {e}")
    print("Please ensure GPU is enabled in Colab settings.")
    
print("="*60)
print(f"Python Version: {sys.version}")
print("="*60)

## Step 2: Install Dependencies

In [None]:
print("📦 Installing dependencies... This may take 2-3 minutes.")
print("="*60)

# Check Python version for compatibility
import sys
python_version = sys.version_info
print(f"🐍 Python Version: {python_version.major}.{python_version.minor}.{python_version.micro}")

if python_version.major == 3 and python_version.minor >= 12:
    print("✅ Python 3.12+ detected - using Python 3.12 compatible packages")
else:
    print(f"ℹ️ Python {python_version.major}.{python_version.minor} detected")

# Install core dependencies
!pip install -q torch>=2.1.0 torchaudio>=2.1.0 torchvision
!pip install -q transformers>=4.35.0
!pip install -q accelerate>=0.24.0
!pip install -q datasets>=2.14.0
!pip install -q librosa>=0.10.0
!pip install -q soundfile>=0.12.0
!pip install -q gradio>=4.0.0
!pip install -q tqdm
!pip install -q pandas
!pip install -q numpy
!pip install -q matplotlib
!pip install -q scipy
!pip install -q pydub
!pip install -q pytorch-lightning>=2.0.0

# Install TTS library (Python 3.12 Compatible)
# Using Idiap's fork of Coqui TTS which supports Python 3.12+
print("�� Installing TTS library (Python 3.12 compatible)...")
!pip install -q coqui-tts

# Install bark (for lightweight option - may have limited Python 3.12 support)
print("📦 Installing Bark (alternative TTS)...")
!pip install -q git+https://github.com/suno-ai/bark.git

print("="*60)
print("✅ All dependencies installed successfully!")
print("✅ Primary TTS: coqui-tts (Python 3.12+ compatible - Idiap fork)")

## Step 3: Mount Google Drive

In [None]:
from google.colab import drive
import os

print("📁 Mounting Google Drive...")
print("="*60)

try:
    drive.mount('/content/drive', force_remount=False)
    print("✅ Google Drive mounted successfully!")
    print(f"✅ Drive path: /content/drive/MyDrive")
except Exception as e:
    print(f"⚠️ Error mounting drive: {e}")
    print("Please authorize Google Drive access when prompted.")
    
print("="*60)

## Step 4: Create Directory Structure

In [None]:
import os

print("🏗️ Creating directory structure...")
print("="*60)

# Create main directories
base_dir = "/content/voiceai"
drive_dir = "/content/drive/MyDrive/voiceai"

directories = [
    f"{base_dir}/dataset",
    f"{base_dir}/processed",
    f"{base_dir}/checkpoints",
    f"{base_dir}/outputs",
    f"{base_dir}/logs",
    f"{drive_dir}/checkpoints",
    f"{drive_dir}/outputs",
    f"{drive_dir}/logs",
]

for directory in directories:
    os.makedirs(directory, exist_ok=True)
    print(f"✅ Created: {directory}")

# Save paths for later use
os.environ['BASE_DIR'] = base_dir
os.environ['DRIVE_DIR'] = drive_dir

print("="*60)
print("✅ Directory structure created successfully!")
print(f"\n📂 Local workspace: {base_dir}")
print(f"📂 Persistent storage: {drive_dir}")

## Step 5: Validate Imports & CUDA

In [None]:
import sys
print("🔍 Validating imports and CUDA support...")
print("="*60)

import_status = {}

# Test critical imports
try:
    import torch
    import_status['torch'] = f"✅ v{torch.__version__}"
    cuda_available = torch.cuda.is_available()
    cuda_version = torch.version.cuda if cuda_available else "N/A"
    import_status['CUDA'] = f"✅ Available (v{cuda_version})" if cuda_available else "❌ Not Available"
    
    if cuda_available:
        device_name = torch.cuda.get_device_name(0)
        import_status['GPU Device'] = f"✅ {device_name}"
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
        import_status['GPU Memory'] = f"✅ {gpu_memory:.1f} GB"
except Exception as e:
    import_status['torch'] = f"❌ Error: {e}"

try:
    import torchaudio
    import_status['torchaudio'] = f"✅ v{torchaudio.__version__}"
except Exception as e:
    import_status['torchaudio'] = f"❌ Error: {e}"

try:
    import transformers
    import_status['transformers'] = f"✅ v{transformers.__version__}"
except Exception as e:
    import_status['transformers'] = f"❌ Error: {e}"

try:
    import TTS
    import_status['coqui-tts (Python 3.12+)'] = f"✅ Installed"
except Exception as e:
    import_status['coqui-tts (Python 3.12+)'] = f"❌ Error: {e}"
    import_status['TTS (Coqui)'] = f"❌ Error: {e}"

try:
    import librosa
    import_status['librosa'] = f"✅ v{librosa.__version__}"
except Exception as e:
    import_status['librosa'] = f"❌ Error: {e}"

try:
    import gradio
    import_status['gradio'] = f"✅ v{gradio.__version__}"
except Exception as e:
    import_status['gradio'] = f"❌ Error: {e}"

try:
    import pytorch_lightning as pl
    import_status['pytorch_lightning'] = f"✅ v{pl.__version__}"
except Exception as e:
    import_status['pytorch_lightning'] = f"❌ Error: {e}"

try:
    import numpy
    import_status['numpy'] = f"✅ v{numpy.__version__}"
except Exception as e:
    import_status['numpy'] = f"❌ Error: {e}"

try:
    import pandas
    import_status['pandas'] = f"✅ v{pandas.__version__}"
except Exception as e:
    import_status['pandas'] = f"❌ Error: {e}"

try:
    import matplotlib
    import_status['matplotlib'] = f"✅ v{matplotlib.__version__}"
except Exception as e:
    import_status['matplotlib'] = f"❌ Error: {e}"

# Display results
for package, status in import_status.items():
    print(f"{package:20} : {status}")

print("="*60)

# Check if all critical packages are available
critical_packages = ['torch', 'CUDA', 'transformers', 'coqui-tts (Python 3.12+)', 'librosa', 'gradio']
all_good = all('✅' in import_status.get(pkg, '') for pkg in critical_packages)

if all_good:
    print("✅ All critical imports validated successfully!")
else:
    print("⚠️ Some packages failed to import. Please check errors above.")

## Step 6: Configuration Summary

In [None]:
import torch
import os
import sys

print("="*60)
print("🎯 VOICE AI SYSTEM CONFIGURATION SUMMARY")
print("="*60)

# System info
print("\n📊 System Information:")
print(f"  • Python Version: {sys.version.split()[0]}")
print(f"  • PyTorch Version: {torch.__version__}")
print(f"  • CUDA Available: {'Yes ✅' if torch.cuda.is_available() else 'No ❌'}")

if torch.cuda.is_available():
    print(f"  • CUDA Version: {torch.version.cuda}")
    print(f"  • GPU Device: {torch.cuda.get_device_name(0)}")
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
    print(f"  • GPU Memory: {gpu_memory:.1f} GB")
    
    # Recommend model based on GPU memory
    print(f"\n🤖 Recommended Model:")
    if gpu_memory >= 16:
        print(f"  • XTTS v2 (Coqui TTS) - High quality, multi-speaker")
        print(f"  • GPU Memory sufficient for full training")
        recommended_model = "XTTS"
    else:
        print(f"  • Bark (Suno) - Lightweight, expressive")
        print(f"  • GPU Memory optimized for efficient training")
        recommended_model = "Bark"
    
    os.environ['RECOMMENDED_MODEL'] = recommended_model

# Paths
print(f"\n📂 Directory Structure:")
print(f"  • Local workspace: /content/voiceai")
print(f"  • Persistent storage: /content/drive/MyDrive/voiceai")
print(f"  • Dataset: /content/voiceai/dataset")
print(f"  • Checkpoints: /content/drive/MyDrive/voiceai/checkpoints")
print(f"  • Outputs: /content/drive/MyDrive/voiceai/outputs")

# Model options
print(f"\n🎙️ Available TTS Models:")
  • Using coqui-tts (Idiap fork) - Python 3.12+ compatible
print(f"  1. XTTS v2 (Coqui) - Best for: High-quality, emotional speech")
print(f"  2. Bark (Suno) - Best for: Lightweight, fast inference")

# Language support
print(f"\n🌍 Language Support:")
print(f"  • Default: English (LJSpeech dataset)")
print(f"  • Custom: Upload multilingual datasets (Hindi, Spanish, etc.)")

# Training config
print(f"\n⚙️ Default Training Configuration:")
print(f"  • Learning Rate: 1e-4")
print(f"  • Batch Size: Auto-adjusted based on GPU")
print(f"  • Epochs: 5 (configurable)")
print(f"  • Optimizer: AdamW")
print(f"  • Mixed Precision: Enabled (FP16)")
print(f"  • Gradient Clipping: 1.0")

print("\n" + "="*60)
print("✅ Setup complete — environment ready")
print("="*60)
print("\n🚀 Next Steps:")
print("  1. Open train_or_finetune.ipynb to prepare data and train model")
print("  2. Open demo.ipynb to run inference and demo")
print("="*60)