# Demo Notebook for Tone Grabber

## Dataset Generator Example

Define the effects and effect parameter mappings for the dataset generation

In [26]:
from dataset.data_generator import DataGenerator
from pedalboard import Reverb, Delay, Chorus, Distortion, Gain
# Dictionary of effects to parameter mappings
effects_to_parameters = {
    "Reverb": {
        "room_size": (0, 1),
        "damping": (0, 1), 
        "wet_level": (0, 1),
        "dry_level": (0, 1),
        "width": (0, 1),
        "freeze_mode": (0, 1)
    },
    "Delay": {
        "delay_seconds": (0, 2),
        "feedback": (0, 1),
        "mix": (0, 1)
    },
    "Chorus": {
        "rate_hz": (0, 100),
        "depth": (0, 1),
        "centre_delay_ms": (1, 30),
        "feedback": (0, 1),
        "mix": (0, 1)
    },
    "Distortion": {
        "drive_db": (0, 100)
    },
    "Gain": {
        "gain_db": (-12, 12)
    }
}
# List of effects
effects = [Reverb, Delay, Distortion, Gain, Chorus]

# create instance of data generator corresponding to effects
generator = DataGenerator(effects_to_parameters, effects)

create a demo dataset with

In [27]:
import os
# num samples is the number of samples created per audio effect so total number of samples created will be:
# num_samples * number of dry_tones
num_samples = 2
audio_directory = os.path.join(os.getcwd(),"demo_data")
dry_tones = os.listdir(audio_directory)
# max_chain_length is the maximum number of effects applied to a sample
max_chain_length = 1
demo_dataset = generator.create_data(num_samples,audio_directory,dry_tones,max_chain_length)

  + 2 * np.log10(f_sq)
100%|██████████| 5/5 [00:00<00:00, 11.10it/s]


Each entry of the dataset has this output signature:

```
"dry_tone": 
{
    "spectrogram":log mel spectrogram of the dry tone,
    "loudness":loudness of the dry tone,
    "f0":fundamental frequency of the dry tone,
    "path":path to the original dry tone
}
```

```
"wet_tone": {
    "spectrogram":log mel spectrogram of the wet tone,
    "loudness":loudness of the wet tone
    "f0":fundamental frequency of the wet tone,
    "path":path to the original wet tone
}
```
```
"effect_names":names of the applied effect(s)
```
```
"effects":one-hot encoding representation of the effects
```
```
"parameters": one-hot like representation of the effect parameters
```

In [28]:
display(demo_dataset[0])

{'dry_tone': {'spectrogram': tensor([[[ 0.4758,  0.1692,  0.5460,  ..., -0.8662, -0.8763, -0.8942],
           [ 0.4337,  0.0829,  0.4597,  ..., -0.9494, -0.8654, -0.9873],
           [ 0.2219, -0.0940,  0.2828,  ..., -0.9795, -0.8412, -0.9774],
           ...,
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670]]]),
  'loudness': tensor([[-72.6773, -26.1597, -11.7637, -12.9103, -13.3780, -15.2924, -11.2443,
           -11.3954, -17.6782, -18.0348, -16.0893, -12.6007,  -9.5491,  -4.5020,
            -5.5202,  -8.6047, -14.3348, -11.7150, -13.2339, -15.8282, -16.7399,
           -15.6837, -12.7970,  -5.6934,  -4.9164, -12.2603,  -2.6314,  -9.8185,
           -13.9228, -15.9667, -13.8649, -14.4242, -16.7210, -19.0233, -17.3137,
           -14.4044, -15.1784, -11.8846,  -5.0025, -13.1384, -12.7837,  -6.3740,
            -4.234

You can also get the metadata for the dataset

In [29]:
metadata = generator.get_metadata()
display(metadata)

{'parameter_mask_str': {'Reverb': [1,
   1,
   1,
   1,
   1,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  'Delay': [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
  'Chorus': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0],
  'Distortion': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  'Gain': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]},
 'parameter_mask_idx': {0: [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  1: [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
  4: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0],
  2: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  3: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]},
 'effect_to_idx': {'Reverb': 0,
  'Delay': 1,
  'Distortion': 2,
  'Gain': 3,
  'Chorus': 4},
 'index_to_effect': {0: 'Reverb',
  1: 'Delay',
  2: 'Distortion',
  3: 'Gain',
  4: 'Chorus'},
 'effects': [pedalboard_native.Reverb,
  pedalboard_native.Delay,
  pedalboard_native.Distortion,
  pedalboard_native.Gain,
  pedalbo

## Feature Extractor Demo

The feature extractor is built into the data generator class so it runs automatically when you run ```generator.create_data()``` 

But here is some demo code in case you run into problems using it anyway

In [30]:
from pedalboard.io import ReadableAudioFile
from dataset.feature_extractor_torch import FeatureExtractorTorch
import numpy as np
# define instance of feature extractor
feature_extractor = FeatureExtractorTorch()
sample_rate = 16000
# read in audio path
dry_tone_path = "demo_data/guitar_acoustic_017-102-050.wav"
with ReadableAudioFile(dry_tone_path) as f:
    # re sample the audio file to match the sample rate, pretrained model is sampled at 16000
    re_sampled = f.resampled_to(sample_rate)
    dry_tone = np.squeeze(re_sampled.read(int(sample_rate * f.duration)),axis=0)
    re_sampled.close()
    f.close()
# read in features
features = feature_extractor.get_features(dry_tone)
# features extracted are log mel spectrogram, loudness, and fundamental frequency (f0)
display(features['spectrogram'].shape)
display(features['loudness'].shape)
display(features['f0'].shape)

torch.Size([1, 1024, 128])

torch.Size([1, 257])

torch.Size([249, 88])

## Parameter Prediction Demo

In [31]:
from torch_code.parameter_prediction import ParameterPrediction
param_mask = metadata['parameter_mask_idx']
num_parameters = metadata['total_parameters']
num_effects = len(metadata['effect_to_idx'].keys())
model = ParameterPrediction(num_effects,num_parameters,param_mask)

In [32]:
param_mask

{0: [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 1: [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
 4: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0],
 2: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
 3: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}

In [33]:
entry = demo_dataset[0]
wet_tone_feat = entry["wet_tone"]
dry_tone_feat = entry["dry_tone"]

out,  effect, params = model(wet_tone_feat['spectrogram'],dry_tone_feat['spectrogram'],wet_tone_feat['loudness'],wet_tone_feat['f0'],dry_tone_feat['loudness'],dry_tone_feat['f0'])
display(out.shape)
display(effect)
display(params)

torch.Size([1, 1536])

tensor([[-0.0907, -0.0599, -0.0238, -0.0602,  0.0289]],
       grad_fn=<AddmmBackward0>)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.1254, 0.0850, 0.0518, 0.1026, 0.0233, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)

### Interpolate output into pedalboad effect matched with parameters

first get the pedalboard effect object from the metadata

In [34]:
import torch
predicted_effect_pb = metadata['effects'][int(torch.argmax(effect))]

then match pedicted parameters to their names so  we can use them as input to the effect object

In [35]:
predicted_params = [float(param) for param in list(params[0].detach()) if param != 0]
param_names = metadata['effects_to_parameters'][metadata['index_to_effect'][int(torch.argmax(effect))]].keys()
matched_params = {param_name:value for param_name,value in zip(param_names,predicted_params)}
matched_params

{'rate_hz': 0.12541311979293823,
 'depth': 0.08499869704246521,
 'centre_delay_ms': 0.05184955894947052,
 'feedback': 0.10257373750209808,
 'mix': 0.023263981565833092}

we can use this on the dry tone create the pedicted wet tone, since this model has rrandom weights it will likely not be accurate rright now but hopefully it will be when trained

In [36]:
from pedalboard import Pedalboard
pred = predicted_effect_pb(**matched_params)
pedalboard = Pedalboard([pred])

In [37]:
from pedalboard.io import ReadableAudioFile
import numpy as np
with ReadableAudioFile(entry['dry_tone']['path']) as f:
    re_sampled = f.resampled_to(sample_rate)
    dry_tone = np.squeeze(re_sampled.read(int(sample_rate * f.duration)),axis=0)
    re_sampled.close()
    f.close()
wet_tone = pedalboard(dry_tone, sample_rate * f.duration)

#### Predicted wet tone

In [38]:
import IPython.display as ipd
ipd.Audio(data=wet_tone,rate=sample_rate)

#### Actual wet tone

In [40]:
effect = entry['effect_names']
effect

['Reverb']

In [41]:
entry

{'dry_tone': {'spectrogram': tensor([[[ 0.4758,  0.1692,  0.5460,  ..., -0.8662, -0.8763, -0.8942],
           [ 0.4337,  0.0829,  0.4597,  ..., -0.9494, -0.8654, -0.9873],
           [ 0.2219, -0.0940,  0.2828,  ..., -0.9795, -0.8412, -0.9774],
           ...,
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670]]]),
  'loudness': tensor([[-72.6773, -26.1597, -11.7637, -12.9103, -13.3780, -15.2924, -11.2443,
           -11.3954, -17.6782, -18.0348, -16.0893, -12.6007,  -9.5491,  -4.5020,
            -5.5202,  -8.6047, -14.3348, -11.7150, -13.2339, -15.8282, -16.7399,
           -15.6837, -12.7970,  -5.6934,  -4.9164, -12.2603,  -2.6314,  -9.8185,
           -13.9228, -15.9667, -13.8649, -14.4242, -16.7210, -19.0233, -17.3137,
           -14.4044, -15.1784, -11.8846,  -5.0025, -13.1384, -12.7837,  -6.3740,
            -4.234