# Song Conversion Demo

This notebook demonstrates how to convert singing voices in songs using the AutoVoice system.

## Features
- Vocal separation (singing voice vs instrumental)
- Pitch extraction (F0 contour)
- Voice conversion to target speaker
- Audio mixing of converted vocals with instrumental

## Requirements
- torch
- librosa
- numpy
- soundfile
- IPython (for audio preview)
- ipywidgets (for file upload)

Install with: `pip install torch librosa numpy soundfile ipython ipywidgets`

In [None]:
# Add the source directory to Python path
import sys
sys.path.append('../src')

In [None]:
# Standard imports
import os
import librosa
import numpy as np
import soundfile as sf
from IPython.display import Audio, display
import ipywidgets as widgets
from pathlib import Path

# AutoVoice imports
from auto_voice.inference.singing_conversion_pipeline import SingingConversionPipeline
from auto_voice.inference.voice_cloner import VoiceCloner

# Initialize song_path to None
song_path = None

## Song Input Setup

Choose one of the following options to provide a song for conversion:

In [None]:
# Option A: Upload a song file
def upload_song(change):
    global song_path
    
    # Create data directory if it doesn't exist
    data_dir = Path('./data/songs')
    data_dir.mkdir(parents=True, exist_ok=True)
    
    uploaded_file = change['new']
    if uploaded_file:
        filename = list(uploaded_file.keys())[0]
        file_content = uploaded_file[filename]['content']
        
        # Save uploaded file
        song_path = str(data_dir / filename)
        with open(song_path, 'wb') as f:
            f.write(file_content)
        
        print(f"Uploaded song saved to: {song_path}")
        
        # Display file info
        try:
            audio, sr = librosa.load(song_path, sr=None)
            duration = len(audio) / sr
            print(f"File: {filename}")
            print(f"Duration: {duration:.2f} seconds")
            print(f"Sample rate: {sr} Hz")
        except Exception as e:
            print(f"Could not analyze audio: {e}")

# Create upload widget
upload_widget = widgets.FileUpload(
    accept='.mp3,.wav,.flac,.ogg',
    multiple=False,
    description='Upload Song'
)
upload_widget.observe(upload_song, names='value')

display(upload_widget)

In [None]:
# Option B: Download a sample songdef download_sample_song():    global song_path        # Create data directory if it doesn't exist    data_dir = Path('./data/songs')    data_dir.mkdir(parents=True, exist_ok=True)        # URL of a small CC0 sample song    sample_url = "https://www.soundjay.com/misc/sounds/bell-ringing-05.wav"    sample_filename = "bell-ringing-05.wav"    song_path = str(data_dir / sample_filename)        # Check if file already exists    if os.path.exists(song_path):        print(f"Sample song already exists: {song_path}")    else:        print(f"Downloading sample song to: {song_path}")        # Download the sample song        import urllib.request        try:            urllib.request.urlretrieve(sample_url, song_path)            print("Sample song downloaded successfully!")        except Exception as e:            print(f"Failed to download sample song: {e}")            print("Please check your internet connection and try again.")            return            # Display file info    try:        audio, sr = librosa.load(song_path, sr=None)        duration = len(audio) / sr        print(f"File: {sample_filename}")        print(f"Duration: {duration:.2f} seconds")        print(f"Sample rate: {sr} Hz")    except Exception as e:        print(f"Could not analyze audio: {e}")# Create download buttondownload_button = widgets.Button(    description='Download Sample Song',    disabled=False,    button_style='info',    tooltip='Download a sample song for testing')def on_download_click(b):    download_sample_song()download_button.on_click(on_download_click)display(download_button)

## Load and Validate Song

Load the song and display basic information about it.

In [None]:
# Check that song_path is defined and file exists
if song_path is None:
    print("ERROR: song_path is not defined. Please use the 'Song Input Setup' cell above to upload or download a song.")
    raise ValueError("song_path is not defined. Use the setup cell to provide a song.")

if not os.path.exists(song_path):
    print(f"ERROR: Song file not found at {song_path}")
    print("Please use the 'Song Input Setup' cell above to upload or download a song.")
    raise FileNotFoundError(f"Song file not found: {song_path}")

# Load and analyze the song
print(f"Loading song from: {song_path}")
try:
    audio, sr = librosa.load(song_path, sr=None)
    duration = len(audio) / sr
    print(f"Audio loaded successfully!")
    print(f"Duration: {duration:.2f} seconds")
    print(f"Sample rate: {sr} Hz")
    print(f"Shape: {audio.shape}")
    
    # Preview audio
    print("\nPreviewing original audio...")
    display(Audio(audio, rate=sr))
except Exception as e:
    print(f"Failed to load audio: {e}")
    raise

## Initialize Voice Cloner

Set up the voice cloner to extract speaker embeddings.

In [None]:
# Initialize voice cloner with GPU if available
try:
    import torch
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")
except ImportError:
    device = 'cpu'
    print("torch not available, using CPU")

voice_cloner = VoiceCloner(device=device)
print("Voice cloner initialized successfully!")

## Create or Load Target Voice Profile

For this demo, we'll create a voice profile from the same song (as an example). In practice, you would use a different voice sample for the target profile.

In [None]:
# Check that song_path is defined and file exists
if song_path is None:
    print("ERROR: song_path is not defined. Please use the 'Song Input Setup' cell above to upload or download a song.")
    raise ValueError("song_path is not defined. Use the setup cell to provide a song.")

if not os.path.exists(song_path):
    print(f"ERROR: Song file not found at {song_path}")
    print("Please use the 'Song Input Setup' cell above to upload or download a song.")
    raise FileNotFoundError(f"Song file not found: {song_path}")

# Create a voice profile from the song (example usage)
# In a real application, you would use a different voice sample
try:
    target_profile = voice_cloner.create_voice_profile(
        audio=song_path,
        user_id='demo_user',
        metadata={'source': 'demo', 'description': 'Demo voice profile'}
    )
    target_profile_id = target_profile['profile_id']
    print(f"Voice profile created successfully: {target_profile_id}")
    print(f"Profile details: {target_profile}")
except Exception as e:
    print(f"Failed to create voice profile: {e}")
    raise

## Initialize Singing Conversion Pipeline

Set up the pipeline for converting singing voices.

In [None]:
# Initialize the singing conversion pipeline
pipeline = SingingConversionPipeline(
    device=device,
    voice_cloner=voice_cloner
)
print("Singing conversion pipeline initialized successfully!")

## Convert Song

Perform the singing voice conversion process.

In [None]:
# Check that song_path is defined and file exists
if song_path is None:
    print("ERROR: song_path is not defined. Please use the 'Song Input Setup' cell above to upload or download a song.")
    raise ValueError("song_path is not defined. Use the setup cell to provide a song.")

if not os.path.exists(song_path):
    print(f"ERROR: Song file not found at {song_path}")
    print("Please use the 'Song Input Setup' cell above to upload or download a song.")
    raise FileNotFoundError(f"Song file not found: {song_path}")

# Perform the conversion
print(f"Converting song: {song_path}")
print(f"Target voice profile: {target_profile_id}")

try:
    result = pipeline.convert_song(
        song_path=song_path,
        target_profile_id=target_profile_id,
        vocal_volume=1.0,
        instrumental_volume=0.9,
        pitch_shift=0.0
    )
    
    print("Song conversion completed successfully!")
    print(f"Output duration: {result['duration']:.2f} seconds")
    print(f"Output sample rate: {result['sample_rate']} Hz")
    
    # Save converted audio
    output_path = './data/converted_song.wav'
    sf.write(output_path, result['mixed_audio'], result['sample_rate'])
    print(f"Converted audio saved to: {output_path}")
    
except Exception as e:
    print(f"Song conversion failed: {e}")
    raise

## Preview Converted Audio

Listen to the converted singing voice.

In [None]:
# Preview the converted audio
converted_audio = result['mixed_audio']
sample_rate = result['sample_rate']

print("Previewing converted audio...")
display(Audio(converted_audio, rate=sample_rate))

## Additional Information

The converted audio has been saved to `./data/converted_song.wav`.

To use your own voice profile:
1. Create a voice profile using `voice_cloner.create_voice_profile()`
2. Replace `target_profile_id` with your profile ID
3. Run the conversion pipeline

Try experimenting with different pitch shifts and volume settings!