In [1]:
# Fix version compatibility issues
%pip install --upgrade diffusers==0.24.0 transformers==4.35.2 accelerate scipy torch torchaudio
%pip install --upgrade xformers --index-url https://download.pytorch.org/whl/cpu

Collecting diffusers==0.24.0
  Using cached diffusers-0.24.0-py3-none-any.whl.metadata (18 kB)
Collecting transformers==4.35.2
  Using cached transformers-4.35.2-py3-none-any.whl.metadata (123 kB)
Collecting transformers==4.35.2
  Using cached transformers-4.35.2-py3-none-any.whl.metadata (123 kB)
Collecting accelerate
  Using cached accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Using cached accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting tokenizers<0.19,>=0.14 (from transformers==4.35.2)
  Using cached tokenizers-0.15.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.7 kB)
Using cached diffusers-0.24.0-py3-none-any.whl (1.8 MB)
Collecting tokenizers<0.19,>=0.14 (from transformers==4.35.2)
  Using cached tokenizers-0.15.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.7 kB)
Using cached diffusers-0.24.0-py3-none-any.whl (1.8 MB)
Using cached transformers-4.35.2-py3-none-any.whl (7.9 MB)
Using cached tokenizers-0.15.2-cp311-cp311-macosx_11_0_arm

In [2]:
# Check versions and ensure compatibility
import sys
import subprocess

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--upgrade", "--quiet"])

print("Checking and installing compatible versions...")

# Install specific compatible versions
try:
    install_package("transformers==4.35.2")
    install_package("diffusers==0.24.0") 
    install_package("accelerate")
    install_package("scipy")
    print("✓ All packages installed successfully")
except Exception as e:
    print(f"Installation error: {e}")
    print("Trying alternative versions...")
    install_package("transformers==4.30.2")
    install_package("diffusers==0.21.4")

Checking and installing compatible versions...
✓ All packages installed successfully
✓ All packages installed successfully


In [3]:
# MAIN AUDIO GENERATION - AudioLDM2 with comprehensive error handling
import warnings
warnings.filterwarnings("ignore")

def check_versions():
    """Check if we have compatible versions"""
    try:
        import transformers
        import diffusers
        
        print(f"Transformers version: {transformers.__version__}")
        print(f"Diffusers version: {diffusers.__version__}")
        
        # Check for known problematic version combinations
        transformers_version = tuple(map(int, transformers.__version__.split('.')[:2]))
        diffusers_version = tuple(map(int, diffusers.__version__.split('.')[:2]))
        
        if transformers_version >= (4, 36) and diffusers_version < (0, 25):
            print("⚠️  WARNING: Potentially incompatible versions detected!")
            print("Consider downgrading transformers or upgrading diffusers")
            return False
            
        return True
    except Exception as e:
        print(f"Version check failed: {e}")
        return False

def generate_audioldm2():
    """Main AudioLDM2 generation function with error handling"""
    try:
        import torch
        import scipy.io.wavfile
        from diffusers import AudioLDM2Pipeline
        
        print("🎵 Attempting AudioLDM2 generation...")
        
        # Check versions first
        if not check_versions():
            print("Proceeding anyway, but expect potential issues...")
        
        device = "cuda" if torch.cuda.is_available() else "cpu"
        torch_dtype = torch.float16 if device == "cuda" else torch.float32
        
        print(f"Using device: {device}")
        print("Loading AudioLDM2 model...")
        
        # Try loading with minimal configuration first
        pipe = AudioLDM2Pipeline.from_pretrained(
            "cvssp/audioldm2-music",
            torch_dtype=torch_dtype,
            low_cpu_mem_usage=True
        )
        pipe = pipe.to(device)
        
        print("✅ Model loaded successfully!")
        
        # Generate with conservative settings
        prompt = "Electronic techno music with strong beat"
        
        print("Generating audio...")
        result = pipe(
            prompt=prompt,
            num_inference_steps=20 if device == "cpu" else 50,
            audio_length_in_s=5.0,
            num_waveforms_per_prompt=1
        )
        
        audio = result.audios[0]
        
        # Save and display
        scipy.io.wavfile.write("audioldm2_techno.wav", rate=16000, data=audio)
        print("✅ AudioLDM2 generation successful!")
        
        from IPython.display import Audio, display
        display(Audio(audio, rate=16000))
        
        return True
        
    except ImportError as e:
        print(f"❌ Import error: {e}")
        print("Make sure all required packages are installed")
        return False
        
    except AttributeError as e:
        if "_get_initial_cache_position" in str(e):
            print("❌ Known compatibility issue with GPT2Model!")
            print("This is caused by version mismatch between transformers and diffusers")
            print("\n🔧 Solutions:")
            print("1. Run cell 5 to install compatible versions")
            print("2. Restart kernel after installation")  
            print("3. Try the fallback method in cell 6")
            return False
        else:
            print(f"❌ Attribute error: {e}")
            return False
            
    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        print("Trying fallback methods...")
        return False

# Run the main generation
success = generate_audioldm2()

if not success:
    print("\n" + "="*50)
    print("❌ AudioLDM2 failed. Please:")
    print("1. Run cell 5 to fix versions") 
    print("2. Restart the kernel")
    print("3. Try cell 6 for alternative methods")
    print("="*50)

❌ Import error: cannot import name 'cached_download' from 'huggingface_hub' (/opt/homebrew/Caskroom/miniforge/base/envs/tf_numpy2/lib/python3.11/site-packages/huggingface_hub/__init__.py)
Make sure all required packages are installed

❌ AudioLDM2 failed. Please:
1. Run cell 5 to fix versions
2. Restart the kernel
3. Try cell 6 for alternative methods


In [7]:
# System Information and PyTorch CUDA Check
import torch
import sys
import platform

print("=" * 60)
print("SYSTEM INFORMATION")
print("=" * 60)
print(f"Platform: {platform.system()} {platform.release()}")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device count: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("\nCUDA is not available. Running on CPU.")
    print("\nIf you have a CUDA-compatible GPU and want to use it:")
    print("1. Visit: https://pytorch.org/get-started/locally/")
    print("2. Select your OS, Package Manager, Python version, and CUDA version")
    print("3. Install the appropriate PyTorch version")
    print("\nFor example, to install PyTorch with CUDA 11.8:")
    print("pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")

print("=" * 60)

SYSTEM INFORMATION
Platform: Darwin 24.6.0
Python version: 3.11.13 | packaged by conda-forge | (main, Jun  4 2025, 14:52:34) [Clang 18.1.8 ]
PyTorch version: 2.8.0
CUDA available: False

CUDA is not available. Running on CPU.

If you have a CUDA-compatible GPU and want to use it:
1. Visit: https://pytorch.org/get-started/locally/
2. Select your OS, Package Manager, Python version, and CUDA version
3. Install the appropriate PyTorch version

For example, to install PyTorch with CUDA 11.8:
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


In [5]:
# Alternative approach - Fix compatibility issues with specific version combinations
import subprocess
import sys
import os

def restart_kernel_message():
    print("🔄 IMPORTANT: Please restart the kernel after running this cell!")
    print("Go to Kernel → Restart Kernel, then run the cells again.")

try:
    # First, let's try a known working combination
    print("Installing known working versions...")
    
    # Uninstall potentially conflicting packages first
    subprocess.run([sys.executable, "-m", "pip", "uninstall", "transformers", "diffusers", "-y"], 
                   capture_output=True)
    
    # Install specific working versions
    packages = [
        "transformers==4.30.2",
        "diffusers==0.21.4",
        "torch",
        "torchaudio", 
        "accelerate==0.20.3",
        "scipy",
        "datasets"
    ]
    
    for package in packages:
        print(f"Installing {package}...")
        result = subprocess.run([sys.executable, "-m", "pip", "install", package], 
                              capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Warning: Failed to install {package}")
            print(f"Error: {result.stderr}")
    
    print("✅ Installation completed!")
    restart_kernel_message()
    
except Exception as e:
    print(f"Installation failed: {e}")
    print("Trying alternative approach...")
    
    # Alternative: Install from conda-forge if pip fails
    try:
        subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", 
                       "transformers==4.28.1", "diffusers==0.19.3"], check=True)
        print("✅ Alternative installation successful!")
        restart_kernel_message()
    except:
        print("❌ Both installation methods failed.")
        print("Please try manually:")
        print("pip uninstall transformers diffusers -y")
        print("pip install transformers==4.30.2 diffusers==0.21.4")

Installing known working versions...
Installing transformers==4.30.2...
Installing transformers==4.30.2...
Installing diffusers==0.21.4...
Installing diffusers==0.21.4...
Installing torch...
Installing torch...
Installing torchaudio...
Installing torchaudio...
Installing accelerate==0.20.3...
Installing accelerate==0.20.3...
Installing scipy...
Installing scipy...
Installing datasets...
Installing datasets...
✅ Installation completed!
🔄 IMPORTANT: Please restart the kernel after running this cell!
Go to Kernel → Restart Kernel, then run the cells again.
✅ Installation completed!
🔄 IMPORTANT: Please restart the kernel after running this cell!
Go to Kernel → Restart Kernel, then run the cells again.


In [6]:
# FALLBACK SOLUTION: Use a different, more stable audio generation model
# This avoids the GPT2Model compatibility issues entirely

import torch
import scipy.io.wavfile
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def generate_audio_fallback():
    """Fallback audio generation using a different approach"""
    try:
        print("🔄 Trying fallback method with AudioLDM (not AudioLDM2)...")
        
        from diffusers import AudioLDMPipeline
        
        # Use the original AudioLDM instead of AudioLDM2
        device = "cuda" if torch.cuda.is_available() else "cpu"
        torch_dtype = torch.float16 if device == "cuda" else torch.float32
        
        print(f"Loading AudioLDM pipeline on {device}...")
        
        # This model is more stable and compatible
        pipe = AudioLDMPipeline.from_pretrained(
            "cvssp/audioldm-s-full-v2", 
            torch_dtype=torch_dtype
        )
        pipe = pipe.to(device)
        
        # Generate audio
        prompt = "Upbeat electronic techno music with strong bass"
        
        print("Generating audio...")
        audio = pipe(
            prompt, 
            num_inference_steps=20,
            audio_length_in_s=10.0,
        ).audios[0]
        
        # Save and display
        scipy.io.wavfile.write("fallback_techno.wav", rate=16000, data=audio)
        print("✅ Fallback audio generated successfully!")
        
        from IPython.display import Audio, display
        display(Audio(audio, rate=16000))
        
        return True
        
    except Exception as e:
        print(f"❌ Fallback method also failed: {e}")
        return False

def generate_simple_audio():
    """Generate simple synthetic audio if all else fails"""
    print("🎵 Generating simple synthetic audio as last resort...")
    
    # Generate a simple techno-like beat
    sample_rate = 16000
    duration = 5  # seconds
    t = np.linspace(0, duration, sample_rate * duration)
    
    # Create a simple beat pattern
    beat_freq = 2.0  # 2 Hz beat
    bass_freq = 80   # Bass frequency
    melody_freq = 440 # Melody frequency
    
    # Generate audio components
    beat = np.sin(2 * np.pi * beat_freq * t) * 0.3
    bass = np.sin(2 * np.pi * bass_freq * t) * 0.4
    melody = np.sin(2 * np.pi * melody_freq * t) * 0.2
    
    # Combine and add some variation
    audio = beat + bass + melody
    audio = audio * (0.5 + 0.5 * np.sin(2 * np.pi * 0.1 * t))  # Add volume modulation
    
    # Normalize
    audio = audio / np.max(np.abs(audio)) * 0.8
    audio = (audio * 32767).astype(np.int16)
    
    # Save
    scipy.io.wavfile.write("simple_techno.wav", sample_rate, audio)
    print("✅ Simple synthetic audio generated!")
    
    from IPython.display import Audio, display
    display(Audio(audio, rate=sample_rate))

# Try methods in order of preference
print("🎯 Attempting audio generation with multiple fallback methods...")

# First try the fallback model
if not generate_audio_fallback():
    print("\n🎵 All AI models failed, generating simple synthetic audio...")
    generate_simple_audio()

🎯 Attempting audio generation with multiple fallback methods...
🔄 Trying fallback method with AudioLDM (not AudioLDM2)...
❌ Fallback method also failed: cannot import name 'cached_download' from 'huggingface_hub' (/opt/homebrew/Caskroom/miniforge/base/envs/tf_numpy2/lib/python3.11/site-packages/huggingface_hub/__init__.py)

🎵 All AI models failed, generating simple synthetic audio...
🎵 Generating simple synthetic audio as last resort...
✅ Simple synthetic audio generated!


In [None]:
# FIX: Hugging Face Hub compatibility issue
import subprocess
import sys

def fix_huggingface_hub():
    """Fix the cached_download import error"""
    try:
        print("🔧 Fixing huggingface_hub compatibility issue...")
        
        # Install compatible version of huggingface_hub
        packages_to_install = [
            "huggingface_hub==0.16.4",  # Version that still has cached_download
            "diffusers==0.21.4",
            "transformers==4.30.2"
        ]
        
        for package in packages_to_install:
            print(f"Installing {package}...")
            result = subprocess.run([
                sys.executable, "-m", "pip", "install", 
                package, "--force-reinstall"
            ], capture_output=True, text=True)
            
            if result.returncode == 0:
                print(f"✅ {package} installed successfully")
            else:
                print(f"⚠️ Warning installing {package}: {result.stderr}")
        
        print("\n🔄 IMPORTANT: Restart the kernel after running this cell!")
        print("Then try running the audio generation cells again.")
        
        return True
        
    except Exception as e:
        print(f"❌ Fix failed: {e}")
        return False

# Run the fix
fix_huggingface_hub()

In [None]:
# MODERN SOLUTION: Use current library versions with proper imports
import warnings
warnings.filterwarnings("ignore")

def generate_with_modern_libs():
    """Generate audio using current library versions"""
    try:
        import torch
        import scipy.io.wavfile
        import numpy as np
        
        print("🎵 Trying modern library approach...")
        
        # Try using a simpler, more stable model first
        from diffusers import DiffusionPipeline
        
        device = "cuda" if torch.cuda.is_available() else "cpu"
        torch_dtype = torch.float16 if device == "cuda" else torch.float32
        
        print(f"Using device: {device}")
        
        # Try AudioCraft/MusicGen as alternative (if available)
        try:
            print("Attempting to use MusicGen...")
            # This would be the preferred modern approach
            # Note: This requires additional setup but is more stable
            pipe = DiffusionPipeline.from_pretrained(
                "facebook/musicgen-small",
                torch_dtype=torch_dtype,
                trust_remote_code=True
            )
            print("✅ MusicGen loaded successfully!")
            
        except Exception as e:
            print(f"MusicGen not available: {e}")
            print("Falling back to synthetic generation...")
            raise e
            
        # Generate music
        prompt = "upbeat electronic techno music"
        audio = pipe(prompt, max_new_tokens=256)
        
        # Process and save
        if hasattr(audio, 'audios'):
            audio_data = audio.audios[0]
        else:
            audio_data = audio
            
        scipy.io.wavfile.write("modern_techno.wav", rate=32000, data=audio_data)
        print("✅ Modern approach successful!")
        
        from IPython.display import Audio, display
        display(Audio(audio_data, rate=32000))
        
        return True
        
    except Exception as e:
        print(f"❌ Modern approach failed: {e}")
        return False

def generate_enhanced_synthetic():
    """Generate more sophisticated synthetic techno music"""
    print("🎶 Generating enhanced synthetic techno...")
    
    import numpy as np
    import scipy.io.wavfile
    
    # Enhanced techno generation
    sample_rate = 44100
    duration = 10
    t = np.linspace(0, duration, sample_rate * duration)
    
    # Create multiple layers
    # Kick drum pattern (4/4 beat)
    kick_freq = 60
    kick_pattern = np.sin(2 * np.pi * kick_freq * t)
    kick_envelope = np.exp(-10 * (t % 0.5))  # Decay every beat
    kick = kick_pattern * kick_envelope * 0.8
    
    # Hi-hat pattern (8th notes)
    hihat_noise = np.random.normal(0, 0.1, len(t))
    hihat_envelope = np.exp(-20 * (t % 0.25))
    hihat = hihat_noise * hihat_envelope * 0.3
    
    # Bass line (oscillating)
    bass_freq = 80 + 20 * np.sin(2 * np.pi * 0.25 * t)  # Oscillating bass
    bass = np.sin(2 * np.pi * bass_freq * t) * 0.6
    
    # Lead synth (higher frequency melody)
    lead_freq = 440 + 200 * np.sin(2 * np.pi * 0.125 * t)
    lead = np.sin(2 * np.pi * lead_freq * t) * 0.4
    
    # Combine all layers
    audio = kick + hihat + bass + lead
    
    # Add some reverb-like effect
    reverb = np.convolve(audio, np.exp(-np.linspace(0, 2, 1000)), mode='same')
    audio = 0.7 * audio + 0.3 * reverb[:len(audio)]
    
    # Normalize and convert
    audio = audio / np.max(np.abs(audio)) * 0.8
    audio = (audio * 32767).astype(np.int16)
    
    # Save
    scipy.io.wavfile.write("enhanced_techno.wav", sample_rate, audio)
    print("✅ Enhanced synthetic techno generated!")
    
    from IPython.display import Audio, display
    display(Audio(audio, rate=sample_rate))

# Try modern approach first, then enhanced synthetic
print("🎯 Trying modern audio generation approaches...")

if not generate_with_modern_libs():
    print("\n🎶 Using enhanced synthetic generation...")
    generate_enhanced_synthetic()