<a href="https://colab.research.google.com/github/22ananya/MUSI6201/blob/main/Final%20Project%20-%20Stereo%20Demixing/Run_Eval_on_Saved_Mixes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import generally required packages - update as needed

In [1]:
# Import dependencies
import numpy as np
import matplotlib.pyplot as plt
import librosa
import scipy.signal as sp
import scipy.io.wavfile as wav

Import/Install the prerequisite code for implementing the Cadenza challenge - includes baselines, other important files

!pip install pyclarity==0.4.0

Import the dataset for the Cadenza challenge directly through the Google Drive link - Only needs to be done once! So now commented out


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Check current path

In [4]:
import os
print(os.getcwd())

/content


Change current path to Cadenza folder

In [5]:
os.chdir('/content/drive/MyDrive/Cadenza_Challenge/cad_icassp_2024')
print(os.getcwd())

/content/drive/MyDrive/Cadenza_Challenge/cad_icassp_2024


#Process single audio file through the entire model step by step

Import all dependencies - same as enhance.py file

In [6]:
from __future__ import annotations

import json
import logging
from pathlib import Path

# pylint: disable=import-error
import hydra
import numpy as np
import torch
from numpy import ndarray
from omegaconf import DictConfig
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB

from clarity.enhancer.compressor import Compressor
from clarity.enhancer.nalr import NALR
from clarity.evaluator.haaqi import compute_haaqi
from clarity.utils.audiogram import Audiogram, Listener
from clarity.utils.file_io import read_signal
from clarity.utils.flac_encoder import FlacEncoder
from clarity.utils.results_support import ResultsFile
from clarity.utils.signal_processing import (
    clip_signal,
    denormalize_signals,
    normalize_signal,
    resample,
    to_16bit,
    compute_rms,
    resample,
)
from clarity.utils.source_separation_support import get_device, separate_sources
from recipes.cad_icassp_2024.baseline.evaluate import (
    apply_gains,
    apply_ha,
    make_scene_listener_list,
    remix_stems,
    load_reference_stems,
)

logger = logging.getLogger(__name__)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  @hydra.main(config_path="", config_name="config")


Import all the required functions defined in Enhance.py that do not need to be changed

In [7]:
from recipes.cad_icassp_2024.baseline.enhance import (
    save_flac_signal,
    decompose_signal,
    process_remix_for_listener
)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  @hydra.main(config_path="", config_name="config")


Import the correct config file (hardcoded location)

In [8]:
from omegaconf import OmegaConf
config = OmegaConf.load('baseline/config.yaml')
print(config.separator.model)

demucs


Set input directory

In [9]:
reference_folder = Path("ref_signals_100")
enhanced_mix_folder = Path("enhanced_signals_100tracks")

# load the audio files in provided path

In [10]:
enhanced_files = [] # initialize list of enhanced audio files
reference_files = [] # initialize list of reference files
for file in os.listdir(enhanced_mix_folder): # iterate over all files in the directory
    if file.endswith('.flac'): # if the file is an audio file
        enhanced_files.append(os.path.join(enhanced_mix_folder, file)) # add the file to the list of audio files
        reference_files.append(os.path.join(reference_folder, file.replace('.flac','_ref.flac'))) # add the corresponding ref file to the list of ref files

In [11]:
print(enhanced_files[0])
print(reference_files[0])
print(len(enhanced_files))

enhanced_signals_100tracks/scene_10001_L0057_remix.flac
ref_signals_100/scene_10001_L0057_remix_ref.flac
100


Load listener data

In [12]:
config.path.root = '/content/drive/MyDrive/Cadenza_Challenge/cad_icassp_2024'


{'root': '/content/drive/MyDrive/Cadenza_Challenge/cad_icassp_2024', 'metadata_dir': '${path.root}/metadata', 'music_dir': '${path.root}/audio/at_mic_music', 'gains_file': '${path.metadata_dir}/gains.json', 'head_positions_file': '${path.metadata_dir}/head_positions.json', 'listeners_file': '${path.metadata_dir}/listeners.train.json', 'music_file': '${path.metadata_dir}/at_mic_music.train.json', 'scenes_file': '${path.metadata_dir}/scenes.train.json', 'scene_listeners_file': '${path.metadata_dir}/scene_listeners.train.json', 'exp_folder': './exp'}


In [13]:
# Load listener audiograms and songs
listener_dict = Listener.load_listener_dict(config.path.listeners_file)

with Path(config.path.gains_file).open("r", encoding="utf-8") as file:
    gains = json.load(file)

with Path(config.path.scenes_file).open("r", encoding="utf-8") as file:
    scenes = json.load(file)

with Path(config.path.scene_listeners_file).open("r", encoding="utf-8") as file:
    scenes_listeners = json.load(file)

with Path(config.path.music_file).open("r", encoding="utf-8") as file:
    songs = json.load(file)

In [20]:
#print(songs)

Load the enhancer (NAL-R) and compression setting (OFF)

In [15]:
enhancer = NALR(**config.nalr)
compressor = Compressor(**config.compressor)

Create a list of songs, listeners (audiogram) and head position (hrtf) to generate, or evaluate the mix - based on provided data

In [16]:
# Select a batch to process
scene_listener_pairs = make_scene_listener_list(
    scenes_listeners, config.evaluate.small_test
)

scene_listener_pairs = scene_listener_pairs[
    config.evaluate.batch :: config.evaluate.batch_size
]

In [17]:
print(scene_listener_pairs[0])

('scene_10001', 'L0066')


# Load functions required for audio evaluation

Add functions and variables needed for evaluation and scoring

In [18]:
scores_headers = [
    "scene",
    "song",
    "listener",
    "left_score",
    "right_score",
    "score",
]


results_file = ResultsFile(
            "scores_100.csv",
            header_columns=scores_headers,
)

# Process Audio - Currently set to process a fixed number of runs (listener - scene pairings, can be changed to run entire dataset)

In [19]:
previous_song = ""
num_tracks = len(enhanced_files)

for idx in range(1):
    # Extract track, listener, scene info from file name
    file_name = enhanced_files[idx]
    sname = file_name.split("/", 1)[1].split("_L",1)[0]
    lname = "L" +  file_name.split("/", 1)[1].split("_L",1)[1].split("_")[0]

    # Get the listener's audiogram
    listener = listener_dict[lname]
    scene = scenes[sname]
    song_name = f"{scene['music']}-{scene['head_loudspeaker_positions']}"

    print(sname)
    print(lname)

    # load audio files for HAAQI
    reference_mixture = read_signal(
            filename=reference_files[idx],
            sample_rate=config.sample_rate,
            allow_resample=True,
        )

    enhanced_mixture = read_signal(
            filename=enhanced_files[idx],
            sample_rate=config.sample_rate,
            allow_resample=True,
        )

    # Evaluate - compare with the generated mixes
    # Compute the scores
    left_score = compute_haaqi(
        processed_signal=resample(
            enhanced_mixture[:, 0],
            config.remix_sample_rate,
            config.HAAQI_sample_rate,
        ),
        reference_signal=resample(
            reference_mixture[:, 0], config.sample_rate, config.HAAQI_sample_rate
        ),
        processed_sample_rate=config.HAAQI_sample_rate,
        reference_sample_rate=config.HAAQI_sample_rate,
        audiogram=listener.audiogram_left,
        equalisation=2,
        level1=65 - 20 * np.log10(compute_rms(reference_mixture[:, 0])),
    )

    right_score = compute_haaqi(
        processed_signal=resample(
            enhanced_mixture[:, 1],
            config.remix_sample_rate,
            config.HAAQI_sample_rate,
        ),
        reference_signal=resample(
            reference_mixture[:, 1], config.sample_rate, config.HAAQI_sample_rate
        ),
        processed_sample_rate=config.HAAQI_sample_rate,
        reference_sample_rate=config.HAAQI_sample_rate,
        audiogram=listener.audiogram_right,
        equalisation=2,
        level1=65 - 20 * np.log10(compute_rms(reference_mixture[:, 1])),
    )

        # Save scores
    results_file.add_result(
        {
            "scene": sname,
            "song": song_name,
            "listener": listener.id,
            "left_score": left_score,
            "right_score": right_score,
            "score": float(np.mean([left_score, right_score])),
        }
    )

    print(right_score, left_score)


scene_10001
L0057
0.13542157935034338 0.12934718417128538
