# Demo Notebook for Tone Grabber

## Dataset Generator Example

Define the effects and effect parameter mappings for the dataset generation

In [1]:
from dataset.data_generator import DataGenerator
from pedalboard import Distortion, Gain, PitchShift, LowpassFilter, HighpassFilter
import torch
# Dictionary of effects to parameter mappings
effects = [Distortion, Gain, PitchShift, LowpassFilter, HighpassFilter]
effects_to_parameters = {
        "Gain": {
            "gain_db": [-60, 24]
        },
        "Distortion": {
            "drive_db": [0, 60]
        },
        "PitchShift": {
        "semitones": [-12, 12]
        },
        "HighpassFilter": {
        "cutoff_frequency_hz": [20, 20000]
        },
        "LowpassFilter": {
        "cutoff_frequency_hz": [20, 20000]
        }
    }


# create instance of data generator corresponding to effects
generator = DataGenerator(effects_to_parameters, effects)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

create a dataset with:

In [2]:
import os
# num samples is the number of samples created per audio effect so total number of samples created will be:
# num_samples * number of dry_tones
num_samples = 2
audio_directory = os.path.join(os.getcwd(),"demo_data")
dry_tones = os.listdir(audio_directory)
# max_chain_length is the maximum number of effects applied to a sample
max_chain_length = 1
demo_dataset = generator.create_data(num_samples,audio_directory,dry_tones,max_chain_length)

100%|██████████| 5/5 [00:00<00:00, 33.26it/s]


Each entry of the dataset has this output signature:

```
"dry_tone": 
{
    "spectrogram":log mel spectrogram of the dry tone,
    "loudness":loudness of the dry tone,
    "f0":fundamental frequency of the dry tone,
    "path":path to the original dry tone
}
```

```
"wet_tone": {
    "spectrogram":log mel spectrogram of the wet tone,
    "loudness":loudness of the wet tone
    "f0":fundamental frequency of the wet tone,
    "path":path to the original wet tone
}
```
```
"effect_names":names of the applied effect(s)
```
```
"effects":one-hot encoding representation of the effects
```
```
"parameters": one-hot like representation of the effect parameters
```

In [3]:
display(demo_dataset[0])

{'dry_tone': {'spectrogram': tensor([[[ 0.4758,  0.1692,  0.5460,  ..., -0.8662, -0.8763, -0.8942],
           [ 0.4337,  0.0829,  0.4597,  ..., -0.9494, -0.8654, -0.9873],
           [ 0.2219, -0.0940,  0.2828,  ..., -0.9795, -0.8412, -0.9774],
           ...,
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670]]]),
  'path': '/home/jonat/tone-grabber/demo_data/guitar_acoustic_017-102-050.wav'},
 'wet_tone': {'spectrogram': tensor([[[ 0.4753,  0.1693,  0.5461,  ..., -1.2776, -1.2776, -1.2776],
           [ 0.4338,  0.0832,  0.4600,  ..., -1.2776, -1.2776, -1.2776],
           [ 0.2235, -0.0928,  0.2840,  ..., -1.2776, -1.2776, -1.2776],
           ...,
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.467

You can also get the metadata for the dataset, this is particularly important for the classifier as it stores which indixes correspond to which effect, as well as other important information

In [4]:
metadata = generator.get_metadata()
metadata['index_to_effect']

{0: 'Distortion',
 1: 'Gain',
 2: 'PitchShift',
 3: 'LowpassFilter',
 4: 'HighpassFilter'}

## Feature Extractor Demo

The feature extractor is built into the data generator class so it runs automatically when you run ```generator.create_data()``` 

But here is some demo code in case you run into problems using it anyway

In [5]:
from pedalboard.io import ReadableAudioFile
from dataset.feature_extractor_torch import FeatureExtractorTorch
import numpy as np
# define instance of feature extractor
feature_extractor = FeatureExtractorTorch()
sample_rate = 16000
# read in audio path
dry_tone_path = "demo_data/guitar_acoustic_017-102-050.wav"
with ReadableAudioFile(dry_tone_path) as f:
    # re sample the audio file to match the sample rate, pretrained model is sampled at 16000
    re_sampled = f.resampled_to(sample_rate)
    dry_tone = np.squeeze(re_sampled.read(int(sample_rate * f.duration)),axis=0)
    re_sampled.close()
    f.close()
# read in features
features = feature_extractor.get_features(dry_tone)
# features extracted are log mel spectrogram, loudness, and fundamental frequency (f0)

## Parameter Prediction Demo

## How to Use the classifier and parameter prediction models together

We train a single model for each effect, and we determine which model to use based on the output of the classifier

In [6]:
#inputs, unsqueeze to mimic batch dimension
wet_tone = demo_dataset[0]['wet_tone']['spectrogram'].unsqueeze(0)
dry_tone = demo_dataset[0]['dry_tone']['spectrogram'].unsqueeze(0)

In [7]:
metadata['index_to_effect']

{0: 'Distortion',
 1: 'Gain',
 2: 'PitchShift',
 3: 'LowpassFilter',
 4: 'HighpassFilter'}

In [15]:
from model.classifier import EffectClassifier
from model.parameter_prediction import ParameterPredictionResNet
from model.utils import PostProcessor

postprocessor = PostProcessor(metadata)
parameter_prediction_dict = {}
for effect in metadata['index_to_effect'].values():
    parameter_model = ParameterPredictionResNet(768, len(metadata['effects_to_parameters'][effect]))
    parameter_model.load_state_dict(torch.load(f"saved_models/{effect}_parameter_prediction.pth",weights_only=False),strict=False)
    parameter_model.eval()
    parameter_prediction_dict[effect] = parameter_model
batch_size = 1
classifier = EffectClassifier(len(metadata['effects']),batch_size=batch_size)
classifier.load_state_dict(torch.load("saved_models/multiclass_model.pth",weights_only=False))
classifier.eval()


effect = classifier(dry_tone, wet_tone)
effect_idx = torch.argmax(effect)
effect_name = metadata['index_to_effect'][int(effect_idx)]
param_model = parameter_prediction_dict[effect_name]
param_model.eval()
joint_spec = torch.cat((dry_tone, wet_tone), dim=1)
params = param_model(joint_spec)
effect_name, predicted_audio, predicted_effect_obj = postprocessor.process_audio_from_outputs(effect,params,demo_dataset[0]['dry_tone']['path'])
display(effect_name)
display(params)

'HighpassFilter'

tensor([[12.2224]], grad_fn=<AddmmBackward0>)

In [16]:
from IPython.display import Audio
#play predicted audio
Audio(predicted_audio, rate=16000)
