In [7]:
import warnings
import logging
import os

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning, message="Coremltools is not installed.")
warnings.filterwarnings("ignore", category=UserWarning, message="tflite-runtime is not installed.")
warnings.filterwarnings("ignore", category=DeprecationWarning, message="The name tf.losses.sparse_softmax_cross_entropy is deprecated.")

# Suppress specific logging warnings
logging.getLogger("root").setLevel(logging.ERROR)
logging.getLogger("tensorflow").setLevel(logging.ERROR)

# Set TensorFlow logging level to suppress informational messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# ══════════════════════════
# Imports
# ══════════════════════════

# Standard Library Imports
from pathlib import Path

# Third-party imports
import gradio as gr

# Local imports
from utilities import separate_audio, audio_to_midi, print_line

# ══════════════════════════
# Helper functions
# ══════════════════════════
def validate_frequency(value):
    return None if value == 0 else value

def split_audio_stems(input_file, model="htdemucs_ft", mp3=True, mp3_rate=320, float32=False, int24=False):
    """Process audio to separate stems."""
    output_path = "./audio_processing/output_stems"

    results = separate_audio(
        input_file,
        output_path=output_path,
        model=model,
        mp3=mp3,
        mp3_rate=mp3_rate,
        float32=float32,
        int24=int24,
    )

    if results is None:
        return print_line("[ERROR] No results returned from `separate_audio`.", text_color="red")

    return [str(path) for path in results]

def convert_to_midi(
    input_file,
    save_midi=True,
    sonify_midi=False,
    save_model_outputs=False,
    onset_threshold=0.5,
    frame_threshold=0.3,
    minimum_note_length=127.70,
    minimum_frequency=None,
    maximum_frequency=None,
    multiple_pitch_bends=False,
    melodia_trick=True,
    sonification_samplerate=44100,
    midi_tempo=120,
):
    """Convert audio to MIDI."""
    minimum_frequency = validate_frequency(minimum_frequency)
    maximum_frequency = validate_frequency(maximum_frequency)

    output_path = "./audio_processing/output_midi"

    midi_path = audio_to_midi(
        audio_path=input_file,
        output_directory=output_path,
        save_midi=save_midi,
        sonify_midi=sonify_midi,
        save_model_outputs=save_model_outputs,
        save_notes=True,
        onset_threshold=onset_threshold,
        frame_threshold=frame_threshold,
        minimum_note_length=minimum_note_length,
        minimum_frequency=minimum_frequency,
        maximum_frequency=maximum_frequency,
        multiple_pitch_bends=multiple_pitch_bends,
        melodia_trick=melodia_trick,
        sonification_samplerate=sonification_samplerate,
        midi_tempo=midi_tempo,
    )
    return str(midi_path)

def create_interface_1():
    with gr.Blocks(theme="shivi/calm_seafoam") as interface:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### Audio Input")
                audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources="upload")
                process_button = gr.Button("Process Audio")

            with gr.Column(scale=1):
                gr.Markdown("### Audio Parameters")
                model = gr.Textbox(value="htdemucs_ft", label='Select Demucs Model', placeholder="htdemucs_ft", max_lines=1)
                mp3 = gr.Checkbox(label="Save as MP3?", value=True)
                mp3_rate = gr.Slider(minimum=60, maximum=600, step=20, value=320, label="MP3 Bitrate (kbps)")
                float32 = gr.Checkbox(label="Save as 32-bit Float Output?", value=False)
                int24 = gr.Checkbox(label="Save as 24-bit Integer Output?", value=False)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### Audio Outputs")
                output_instrumental = gr.Audio(label="Main (Other)")
                output_voice = gr.Audio(label="Voice")
                output_bass = gr.Audio(label="Bass")
                output_drums = gr.Audio(label="Drums")

        process_button.click(
            split_audio_stems,
            inputs=[audio_input, model, mp3, mp3_rate, float32, int24],
            outputs=[output_instrumental, output_voice, output_bass, output_drums],
        )
    return interface

def create_interface_2():
    with gr.Blocks(theme="shivi/calm_seafoam") as interface:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### Audio Input")
                audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources="upload")
                gr.Markdown("### Parameters")
                save_midi = gr.Checkbox(label="Save MIDI File?", value=True)
                sonify_midi = gr.Checkbox(label="Sonify MIDI? (Generate Audio from MIDI)", value=False)
                save_model_outputs = gr.Checkbox(label="Save Model Output?", value=False)
                # save_notes = gr.Checkbox(label="Save Notes?", value=False)
                onset_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Onset Threshold")
                frame_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.3, label="Frame Threshold")
                minimum_note_length = gr.Slider(minimum=10, maximum=500, step=10, value=127.7, label="Minimum Note Length (ms)")
                minimum_frequency = gr.Number(label="Minimum Frequency (Hz)", value=None)
                maximum_frequency = gr.Number(label="Maximum Frequency (Hz)", value=None)
                multiple_pitch_bends = gr.Checkbox(label="Allow Multiple Pitch Bends?", value=False)
                melodia_trick = gr.Checkbox(label="Apply Melodia Trick?", value=True)
                sonification_samplerate = gr.Number(label="Sonification Samplerate (Hz)", value=44100)
                midi_tempo = gr.Number(label="MIDI Tempo (BPM)", value=120)
                process_button = gr.Button("Process Audio")

            with gr.Column(scale=1):
                gr.Markdown("### Audio Outputs")
                midi_output = gr.Audio(label="Midi")

            process_button.click(
                convert_to_midi,
                # inputs=[
                #     audio_input, save_midi, sonify_midi, False, onset_threshold, frame_threshold,
                #     minimum_note_length, None, None, False, True, 44100, midi_tempo
                # ],
                inputs=[
                    audio_input, save_midi, sonify_midi, save_model_outputs, onset_threshold, frame_threshold,
                    minimum_note_length, minimum_frequency, maximum_frequency, multiple_pitch_bends, melodia_trick, sonification_samplerate, midi_tempo
                ],
                outputs=[midi_output],
            )

    return interface

interface_1 = create_interface_1()
interface_2 = create_interface_2()
tabbed_interface = gr.TabbedInterface([interface_1, interface_2], ["Separate Audio", "Audio to MIDI"])
tabbed_interface.launch()





* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




[1m[90m╔══════════════════════════════════════════════════════════════════════════════╗[0m
[1m[90m║[0m[1m[97m                    [STEP 1] Separating Audio with Demucs                     [0m[1m[90m║[0m
[1m[90m╚══════════════════════════════════════════════════════════════════════════════╝[0m
[1m[94m[INFO] 
	Separating the audio file `Sál.mp3`[0m
[1m[90m════════════════════════════════════════════════════════════════════════════════[0m
[1m[96m[CMD] 
	`python -m demucs.separate -o audio_processing\output_stems -n htdemucs_ft --device cuda:0 --mp3 --mp3-bitrate=320`[0m
[1m[90m════════════════════════════════════════════════════════════════════════════════[0m
Selected model is a bag of 4 models. You will see that many progress bars per track.
Separated tracks will be stored in C:\Users\chris\Code\AI-UTSA-2024\Projects\Group_3_Project\audio_processing\output_stems\htdemucs_ft
Separating track C:\Users\chris\AppData\Local\Temp\gradio\71a76f47053dc1bac7644eb02ddd57f

100%|████████████████████████████████████████████████████████████████████████| 187.2/187.2 [00:07<00:00, 24.12seconds/s]
100%|████████████████████████████████████████████████████████████████████████| 187.2/187.2 [00:05<00:00, 31.97seconds/s]
100%|████████████████████████████████████████████████████████████████████████| 187.2/187.2 [00:05<00:00, 33.29seconds/s]
100%|████████████████████████████████████████████████████████████████████████| 187.2/187.2 [00:05<00:00, 32.38seconds/s]


[1m[90m╔══════════════════════════════════════════════════════════════════════════════╗[0m
[1m[90m║[0m[1m[97m                      [STEP 2] Converting Audio to MIDI                       [0m[1m[90m║[0m
[1m[90m╚══════════════════════════════════════════════════════════════════════════════╝[0m
[1m[94m[INFO] 
	Converting audio file to MIDI: 
		`C:\Users\chris\AppData\Local\Temp\gradio\8b121a4528767d58557eaf21b83eb072bb7feeb86cb1f0fc143e20235dd7ef50\vocals.mp3`[0m
[1m[90m════════════════════════════════════════════════════════════════════════════════[0m

Predicting MIDI for C:\Users\chris\AppData\Local\Temp\gradio\8b121a4528767d58557eaf21b83eb072bb7feeb86cb1f0fc143e20235dd7ef50\vocals.mp3...


  Creating midi...
  💅 Saved to audio_processing\output_midi\vocals_basic_pitch.mid


  Creating note events...
  🌸 Saved to audio_processing\output_midi\vocals_basic_pitch.csv
[1m[97m [0m
[1m[90m═══════════════════════════════════════════════════════════════════════════════