# Demo Notebook for Tone Grabber

## Dataset Generator Example

Define the effects and effect parameter mappings for the dataset generation

In [1]:
from dataset.data_generator import DataGenerator
from pedalboard import Reverb, Delay, Chorus, Distortion, Gain
import torch
# Dictionary of effects to parameter mappings
effects_to_parameters = {
    "Reverb": {
        "room_size": (0, 1),
        "damping": (0, 1), 
        "wet_level": (0, 1),
        "dry_level": (0, 1),
        "width": (0, 1),
        "freeze_mode": (0, 1)
    },
    "Delay": {
        "delay_seconds": (0, 2),
        "feedback": (0, 1),
        "mix": (0, 1)
    },
    "Chorus": {
        "rate_hz": (0, 100),
        "depth": (0, 1),
        "centre_delay_ms": (1, 30),
        "feedback": (0, 1),
        "mix": (0, 1)
    },
    "Distortion": {
        "drive_db": (0, 100)
    },
    "Gain": {
        "gain_db": (-12, 12)
    }
}
# List of effects
effects = [Reverb, Delay, Distortion, Gain, Chorus]

# create instance of data generator corresponding to effects
generator = DataGenerator(effects_to_parameters, effects)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

create a demo dataset with

In [2]:
import os
# num samples is the number of samples created per audio effect so total number of samples created will be:
# num_samples * number of dry_tones
num_samples = 2
audio_directory = os.path.join(os.getcwd(),"demo_data")
dry_tones = os.listdir(audio_directory)
# max_chain_length is the maximum number of effects applied to a sample
max_chain_length = 1
demo_dataset = generator.create_data(num_samples,audio_directory,dry_tones,max_chain_length)

100%|██████████| 5/5 [00:00<00:00, 64.77it/s]


Each entry of the dataset has this output signature:

```
"dry_tone": 
{
    "spectrogram":log mel spectrogram of the dry tone,
    "loudness":loudness of the dry tone,
    "f0":fundamental frequency of the dry tone,
    "path":path to the original dry tone
}
```

```
"wet_tone": {
    "spectrogram":log mel spectrogram of the wet tone,
    "loudness":loudness of the wet tone
    "f0":fundamental frequency of the wet tone,
    "path":path to the original wet tone
}
```
```
"effect_names":names of the applied effect(s)
```
```
"effects":one-hot encoding representation of the effects
```
```
"parameters": one-hot like representation of the effect parameters
```

In [3]:
display(demo_dataset[0])

{'dry_tone': {'spectrogram': tensor([[[ 0.4758,  0.1692,  0.5460,  ..., -0.8662, -0.8763, -0.8942],
           [ 0.4337,  0.0829,  0.4597,  ..., -0.9494, -0.8654, -0.9873],
           [ 0.2219, -0.0940,  0.2828,  ..., -0.9795, -0.8412, -0.9774],
           ...,
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670]]]),
  'path': '/home/jonat/tone-grabber/demo_data/guitar_acoustic_017-102-050.wav'},
 'wet_tone': {'spectrogram': tensor([[[ 0.5192,  0.2126,  0.5894,  ..., -0.8228, -0.8329, -0.8509],
           [ 0.4770,  0.1262,  0.5031,  ..., -0.9061, -0.8220, -0.9438],
           [ 0.2653, -0.0506,  0.3262,  ..., -0.9362, -0.7979, -0.9341],
           ...,
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.4670,  0.4670,  ...,  0.4670,  0.4670,  0.4670],
           [ 0.4670,  0.467

You can also get the metadata for the dataset

In [4]:
metadata = generator.get_metadata()
display(metadata)

{'parameter_mask_str': {'Reverb': [1,
   1,
   1,
   1,
   1,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  'Delay': [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
  'Chorus': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0],
  'Distortion': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  'Gain': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]},
 'parameter_mask_idx': {0: [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  1: [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
  4: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0],
  2: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  3: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]},
 'effect_to_idx': {'Reverb': 0,
  'Delay': 1,
  'Distortion': 2,
  'Gain': 3,
  'Chorus': 4},
 'index_to_effect': {0: 'Reverb',
  1: 'Delay',
  2: 'Distortion',
  3: 'Gain',
  4: 'Chorus'},
 'effects': [pedalboard_native.Reverb,
  pedalboard_native.Delay,
  pedalboard_native.Distortion,
  pedalboard_native.Gain,
  pedalbo

## Feature Extractor Demo

The feature extractor is built into the data generator class so it runs automatically when you run ```generator.create_data()``` 

But here is some demo code in case you run into problems using it anyway

In [5]:
from pedalboard.io import ReadableAudioFile
from dataset.feature_extractor_torch import FeatureExtractorTorch
import numpy as np
# define instance of feature extractor
feature_extractor = FeatureExtractorTorch()
sample_rate = 16000
# read in audio path
dry_tone_path = "demo_data/guitar_acoustic_017-102-050.wav"
with ReadableAudioFile(dry_tone_path) as f:
    # re sample the audio file to match the sample rate, pretrained model is sampled at 16000
    re_sampled = f.resampled_to(sample_rate)
    dry_tone = np.squeeze(re_sampled.read(int(sample_rate * f.duration)),axis=0)
    re_sampled.close()
    f.close()
# read in features
features = feature_extractor.get_features(dry_tone)
# features extracted are log mel spectrogram, loudness, and fundamental frequency (f0)

## Parameter Prediction Demo

In [6]:
from model.parameter_prediction import ParameterPrediction
param_mask = metadata['parameter_mask_idx']
num_parameters = metadata['total_parameters']
num_effects = len(metadata['effect_to_idx'].keys())
batch_size=1
model = ParameterPrediction(num_effects,num_parameters,param_mask,batch_size=batch_size).to(device)

## Train the parameter prediction model on a sample dataset

In [None]:
from model.parameter_prediction import Trainer
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss, MSELoss
batch_size = 1
train_loader = DataLoader(demo_dataset, batch_size=batch_size, shuffle=True)
loss_fn_effect = CrossEntropyLoss()
loss_fn_params = MSELoss()
optimizer = Adam(model.parameters(),.00001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
trainer = Trainer(model, metadata,lambda_=.65)
trainer.train(model, train_loader, train_loader, loss_fn_effect, loss_fn_params, optimizer, scheduler, 20)

### After Training we can use the post processor to process model outputs into pedalboard effects

In [None]:
entry = demo_dataset[0]
wet_tone_feat = entry["wet_tone"]
dry_tone_feat = entry["dry_tone"]

out,  effect, params = model(wet_tone_feat['spectrogram'].to(device),dry_tone_feat['spectrogram'].to(device))
display(out.shape)
display(effect)
display(params)

In [None]:
from model.parameter_prediction import PostProcessor
post_processor = PostProcessor(metadata)
wet_tone, predicted_effect = post_processor.process_audio_from_outputs(effect,params[0],dry_tone_feat['path'])

In [None]:
import IPython.display as ipd
ipd.Audio(data=wet_tone,rate=sample_rate)