In [None]:
!pip install flask-ngrok torchsde diffusers soundfile flask-cors



In [None]:
!pip install flask pyngrok torch numpy diffusers[torch] huggingface_hub



In [None]:
!pip install demucs torchaudio

!pip install -U librosa



In [None]:
from pyngrok import conf

conf.get_default().auth_token = "ngrok_API_KEY"
print("ngrok authtoken set successfully.")

ngrok authtoken set successfully.


In [None]:
from flask import Flask, request, jsonify, send_file
import torch
import soundfile as sf
import io
import numpy as np
from diffusers import StableAudioPipeline
from huggingface_hub import login
from flask_cors import CORS
from pyngrok import ngrok
import threading
from demucs.pretrained import get_model
import tempfile
import os
import base64

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True)

# Pipeline Status
class PipelineStatus:
    def __init__(self):
        self.audio_gen_ready = False
        self.audio_gen_error = None

status = PipelineStatus()

try:
    print(f"CUDA available: {torch.cuda.is_available()}")
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA device count: {torch.cuda.device_count()}")
    if torch.cuda.is_available():
        print(f"Current device: {torch.cuda.current_device()}")
        print(f"Device name: {torch.cuda.get_device_name(0)}")

    demucs_model = get_model(name="htdemucs")
    demucs_model.to("cuda" if torch.cuda.is_available() else "cpu")

    login(token="api_key")
    gen_pipe = StableAudioPipeline.from_pretrained(
        "stabilityai/stable-audio-open-1.0",
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
    ).to("cuda" if torch.cuda.is_available() else "cpu")

    status.audio_gen_ready = True
    print("Audio Generation Pipeline Ready")
except Exception as e:
    status.audio_gen_error = str(e)
    print(f"Audio Generation Failed: {str(e)}")

@app.route('/status')
def pipeline_status():
    return jsonify({
        "audio_generation": {
            "ready": status.audio_gen_ready,
            "error": status.audio_gen_error
        }
    })

@app.route('/generate', methods=['POST'])
def generate():
    if not status.audio_gen_ready:
        return jsonify({"error": "Audio generation unavailable", "details": status.audio_gen_error}), 503

    try:
        data = request.get_json()

        prompt = data.get("prompt", "electronic music")
        steps = min(int(data.get("steps", 50)), 200)
        duration = float(data.get("duration", 10.0))

        max_duration = 47.55
        if duration > max_duration:
            return jsonify({"error": f"Duration cannot exceed {max_duration} seconds"}), 400

        result = gen_pipe(
            prompt=prompt,
            num_inference_steps=steps,
            audio_end_in_s=duration
        )

        audio_data = result.audios[0].T.cpu().numpy().astype(np.float32)

        buffer = io.BytesIO()
        sf.write(buffer, audio_data, 44100, format='WAV')
        buffer.seek(0)

        # Save generated audio to temp file for stem separation
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
            sf.write(tmp.name, audio_data, 44100, format='WAV')

        stems = {}
        with tempfile.TemporaryDirectory() as out_dir:
            # Run stem separation
            demucs_model.segment(
                [tmp.name],
                out_dir=out_dir,
                filename="{stem}.{ext}",
                shifts=1  # Reduce for faster processing
            )

            # Load separated stems
            for stem in ["drums", "bass", "other", "vocals"]:  # Adjust as needed
                stem_path = os.path.join(out_dir, "htdemucs", os.path.basename(tmp.name).replace(".wav", ""), f"{stem}.wav")
                if os.path.exists(stem_path):
                    with open(stem_path, "rb") as f:
                        stems[stem] = io.BytesIO(f.read())

        # Cleanup
        os.unlink(tmp.name)

        # Return full audio and stems as base64-encoded data in JSON
        return jsonify({
            "stems": {
                "full": base64.b64encode(buffer.getvalue()).decode("utf-8"),
                "drums": base64.b64encode(stems.get("drums", io.BytesIO()).getvalue()).decode("utf-8"),
                "bass": base64.b64encode(stems.get("bass", io.BytesIO()).getvalue()).decode("utf-8"),
                "other": base64.b64encode(stems.get("other", io.BytesIO()).getvalue()).decode("utf-8"),
                "vocals": base64.b64encode(stems.get("vocals", io.BytesIO()).getvalue()).decode("utf-8")
            }
        })

    except ValueError as ve:
        return jsonify({"error": "Invalid input", "details": str(ve)}), 400
    except Exception as e:
        return jsonify({"error": "Generation failed", "details": str(e)}), 500

if __name__ == "__main__":
    port = 5000
    try:
        tunnels = ngrok.get_tunnels()
        for tunnel in tunnels:
            ngrok.disconnect(tunnel.public_url)
            print(f"Disconnected existing tunnel: {tunnel.public_url}")

        tunnel = ngrok.connect(port, bind_tls=True)
        public_url = tunnel.public_url
        print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:{port}\"")
    except Exception as e:
        print(f"Error starting ngrok tunnel: {e}")
        print("Check ngrok dashboard to stop existing tunnels or upgrade plan.")
        public_url = "Tunnel creation failed. Check ngrok dashboard."

    threading.Thread(target=app.run, kwargs={"host": "0.0.0.0", "port": port, "use_reloader": False}).start()

CUDA available: True
PyTorch version: 2.6.0+cu124
CUDA device count: 1
Current device: 0
Device name: NVIDIA A100-SXM4-40GB


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

  WeightNorm.apply(module, name, dim)


Audio Generation Pipeline Ready


ERROR:pyngrok.process.ngrok:t=2025-05-18T16:56:39+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/agents\r\n\r\nERR_NGROK_108\r\n"
ERROR:pyngrok.process.ngrok:t=2025-05-18T16:56:39+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok st

Error starting ngrok tunnel: The ngrok process errored on start: authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/agents\r\n\r\nERR_NGROK_108\r\n.
Check ngrok dashboard to stop existing tunnels or upgrade plan.
 * Serving Flask app '__main__'
 * Debug mode: off


In [None]:
!lsof -i :5000
#kill 1004

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


COMMAND   PID USER   FD   TYPE DEVICE SIZE/OFF NODE NAME
python3 12215 root  101u  IPv4 309955      0t0  TCP *:5000 (LISTEN)
/bin/bash: line 1: kill: (1004) - No such process


In [None]:
import torch
import soundfile as sf
import numpy as np
from diffusers import StableAudioPipeline
from huggingface_hub import login
import os
import subprocess
import zipfile

# Check GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(0)}")

# Login to Hugging Face (use your token)
login(token="api_key")  # Replace with your token if different

# Initialize the StableAudioPipeline
try:
    pipe = StableAudioPipeline.from_pretrained(
        "stabilityai/stable-audio-open-1.0",
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
    ).to("cuda" if torch.cuda.is_available() else "cpu")
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# Generate audio
try:
    prompt = "A psychedelic, spacey intro built on shimmering synth textures and heavy reverb. It opens with a slow, swirling ambient pad that creates a dreamy, almost otherworldly atmosphere. There’s a low, pulsing bass subtly layered underneath, with no drums yet — just an immersive, atmospheric buildup that feels weightless and euphoric"
    duration = 45  # Duration in seconds
    steps = 200    # Number of inference steps

    print(f"Generating audio with prompt: '{prompt}', duration: {duration}s, steps: {steps}")
    result = pipe(
        prompt=prompt,
        num_inference_steps=steps,
        audio_end_in_s=duration
    )

    # Extract audio data
    audio_data = result.audios[0].T.cpu().numpy().astype(np.float32)

    # Save the full audio to a file
    output_file = "test_audio20.wav"
    sf.write(output_file, audio_data, 44100, format='WAV')
    print(f"Full audio saved as {output_file}")
except Exception as e:
    print(f"Error generating audio: {e}")
    raise

# Perform stem separation using Demucs command-line tool
try:
    print("Starting stem separation with Demucs...")
    # Ensure demucs is installed
    try:
        subprocess.run(["pip", "install", "demucs"], check=True, capture_output=True, text=True)
        print("Demucs library installed.")
    except subprocess.CalledProcessError as e:
        print(f"Error installing Demucs: {e.stderr}")
        raise

    # Define output directory for stems
    output_dir = "separated_stems"
    os.makedirs(output_dir, exist_ok=True)

    # Run Demucs command-line tool for stem separation
    input_file = output_file  # Use the generated audio file
    model = "htdemucs"  # Use htdemucs model for 4 stems
    command = [
        "python", "-m", "demucs.separate",
        "--out", output_dir,
        "--name", model,
        input_file
    ]
    print(f"Running Demucs command: {' '.join(command)}")
    result = subprocess.run(command, capture_output=True, text=True)

    if result.returncode == 0:
        print("Stem separation completed successfully!")
        print(result.stdout)

        # Define expected stem names based on htdemucs model
        stem_names = ["drums", "bass", "other", "vocals"]
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        model_dir = os.path.join(output_dir, model, base_name)

        # Prepare list of files to include in ZIP
        files_to_zip = [output_file]  # Start with full audio
        for stem_name in stem_names:
            stem_file = os.path.join(model_dir, f"{stem_name}.wav")
            if os.path.exists(stem_file):
                # Copy to a top-level file for easier inclusion in ZIP
                output_stem_file = f"test_audio20_{stem_name}.wav"
                with open(stem_file, "rb") as f_in:
                    with open(output_stem_file, "wb") as f_out:
                        f_out.write(f_in.read())
                print(f"Stem saved as {output_stem_file}")
                files_to_zip.append(output_stem_file)
            else:
                print(f"Stem file not found: {stem_file}")

        # Create a ZIP file containing all audio files
        zip_filename = "test_audio20_all_files.zip"
        with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for file_path in files_to_zip:
                zipf.write(file_path, os.path.basename(file_path))
                print(f"Added {file_path} to ZIP archive")

        print(f"ZIP archive created as {zip_filename}")

        # Download the ZIP file
        from google.colab import files
        print("Downloading ZIP archive with all files...")
        files.download(zip_filename)
    else:
        print("Error during stem separation:")
        print(result.stderr)
        raise Exception("Demucs separation failed")
except Exception as e:
    print(f"Error during stem separation: {e}")

CUDA available: True
PyTorch version: 2.6.0+cu124
CUDA device count: 1
Current device: 0
Device name: NVIDIA A100-SXM4-40GB


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

Model loaded successfully!
Generating audio with prompt: 'A psychedelic, spacey intro built on shimmering synth textures and heavy reverb. It opens with a slow, swirling ambient pad that creates a dreamy, almost otherworldly atmosphere. There’s a low, pulsing bass subtly layered underneath, with no drums yet — just an immersive, atmospheric buildup that feels weightless and euphoric', duration: 45s, steps: 200


  0%|          | 0/200 [00:00<?, ?it/s]



Full audio saved as test_audio20.wav
Starting stem separation with Demucs...
Demucs library installed.
Running Demucs command: python -m demucs.separate --out separated_stems --name htdemucs test_audio20.wav
Stem separation completed successfully!
Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in /content/separated_stems/htdemucs
Separating track test_audio20.wav

Stem saved as test_audio20_drums.wav
Stem saved as test_audio20_bass.wav
Stem saved as test_audio20_other.wav
Stem saved as test_audio20_vocals.wav
Added test_audio20.wav to ZIP archive
Added test_audio20_drums.wav to ZIP archive
Added test_audio20_bass.wav to ZIP archive
Added test_audio20_other.wav to ZIP archive
Added test_audio20_vocals.wav to ZIP archive
ZIP archive created as test_audio20_all_files.zip
Downloading ZIP archive with all files...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>