# Live Demo Notebook

First we find our input and output devices

In [4]:
import sounddevice as sd
from model.classifier import EffectClassifier
from model.parameter_prediction import ParameterPredictionResNet
from model.utils import PostProcessor
import pickle
import torch
from dataset.feature_extractor_torch import FeatureExtractorTorch
from pedalboard import Distortion, Gain, PitchShift, LowpassFilter, HighpassFilter
import numpy as np
print(sd.query_devices())

   0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)
>  1 Microphone Array (Realtek(R) Au, MME (4 in, 0 out)
   2 Microsoft Sound Mapper - Output, MME (0 in, 2 out)
<  3 Speakers (Realtek(R) Audio), MME (0 in, 2 out)
   4 Primary Sound Capture Driver, Windows DirectSound (2 in, 0 out)
   5 Microphone Array (Realtek(R) Audio), Windows DirectSound (4 in, 0 out)
   6 Primary Sound Driver, Windows DirectSound (0 in, 2 out)
   7 Speakers (Realtek(R) Audio), Windows DirectSound (0 in, 2 out)
   8 Speakers (Realtek(R) Audio), Windows WASAPI (0 in, 2 out)
   9 Microphone Array (Realtek(R) Audio), Windows WASAPI (4 in, 0 out)
  10 Speakers (), Windows WDM-KS (0 in, 2 out)
  11 Microphone (Realtek HD Audio Mic input), Windows WDM-KS (2 in, 0 out)
  12 Headphones (Realtek HD Audio 2nd output), Windows WDM-KS (0 in, 2 out)
  13 Microphone Array (Realtek HD Audio Mic Array input), Windows WDM-KS (4 in, 0 out)
  14 Speakers 1 (Realtek HD Audio output with HAP), Windows WDM-KS (0 in, 2 out)
  15 Sp

Load in our models

In [5]:
torch.cuda.is_available()

True

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

effects = [Distortion, Gain, PitchShift, LowpassFilter, HighpassFilter]
effects_to_parameters = {
        "Gain": {
            "gain_db": [-60, 24]
        },
        "Distortion": {
            "drive_db": [0, 60]
        },
        "PitchShift": {
        "semitones": [-12, 12]
        },
        "HighpassFilter": {
        "cutoff_frequency_hz": [20, 20000]
        },
        "LowpassFilter": {
        "cutoff_frequency_hz": [20, 20000]
        }
    }

with open('saved_models/classifier_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)
param_mask = metadata['parameter_mask_idx']
num_parameters = metadata['total_parameters']
num_effects = len(metadata['effect_to_idx'].keys())
classifier = EffectClassifier(num_effects,batch_size=1).to(device)
classifier.load_state_dict(torch.load("saved_models/multiclass_model.pth",weights_only=False))
classifier.eval()
feature_extractor = FeatureExtractorTorch()
parameter_model_dict = {}
for effect_name, param_dict in effects_to_parameters.items():
    model = ParameterPredictionResNet(768,len(param_dict.values())).to(device)
    model.load_state_dict(torch.load(f"saved_models/{effect_name}_parameter_prediction.pth",weights_only=False),strict=False)
    parameter_model_dict[effect_name] = model.eval()

preprocessor_config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Define the prediction function that can go from wet tone and dry tone -> pedalboard effect object with parameters set

In [19]:
def predict(wet_tone, dry_tone,dry_tone_path, classifier=classifier, parameter_model_dict=parameter_model_dict, metadata=metadata, post_processor=PostProcessor(metadata)):
    dry_tone_spec = feature_extractor.get_spectrogram(np.array(dry_tone)).to(device)
    wet_tone_spec = feature_extractor.get_spectrogram(np.array(wet_tone)).to(device)
    
    effect = classifier(dry_tone_spec, wet_tone_spec)
    effect_idx = torch.argmax(effect)
    effect_name = metadata['index_to_effect'][int(effect_idx)]
    
    joint_spec = torch.cat((dry_tone_spec.unsqueeze(0),wet_tone_spec.unsqueeze(0)),dim=1)
    param_model = parameter_model_dict[effect_name]
    params = param_model(joint_spec.to(device))
    # Example if your model supports this
    effect_name, predicted_tone, predicted_effect = post_processor.process_audio_from_outputs(effect,params,dry_tone_path)
    return predicted_effect

In [20]:
import librosa
dry_tone_path="C:/Users/jonat/tone-grabber/demo_data/guitar_acoustic_017-102-050.wav"
wet_tone = librosa.load(dry_tone_path,sr=16000)
dry_tone = librosa.load(dry_tone_path,sr=16000)
predicted_pedalboard = predict(wet_tone[0], dry_tone[0],dry_tone_path)

In [None]:
from pedalboard import Gain

gain = Gain(15)
wet_tone = gain(dry_tone[0],sample_rate=16000)


In [None]:
from pedalboard import Pedalboard, Chorus, Compressor, Delay, Gain, Reverb, Phaser
from pedalboard.io import AudioStream

# Open up an audio stream:
input_device_name = "IN 05 (BEHRINGER UMC 1820)"
output_device_name = "Speakers (Realtek(R) Audio)"

with AudioStream(
  input_device_name=input_device_name,  # Guitar interface
  output_device_name=output_device_name,
  num_input_channels=1
) as stream:
  # Audio is now streaming through this pedalboard and out of your speakers!
  stream.plugins = predicted_pedalboard
  input("Press enter to stop streaming...")

# The live AudioStream is now closed, and audio has stopped.