In [1]:
import collections
import getopt
import os
import shelve
import sys
import logging


import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

import librosa
import librosa.display

import numpy as np
import matplotlib.pyplot as plt
import re

import Spectrogram as sp
from utils import get_time_resolution, get_frequency_bins, plot_time_domain, extract_instrument_info, check_path


from Models.Autoencoder1D import Autoencoder1D


In [2]:
so = open("data_train.log", "w", 10)
sys.stdout.echo = so
sys.stderr.echo = so

get_ipython().log.handlers[0].stream = so
get_ipython().log.setLevel(logging.INFO)

In [3]:
def compute_spectrogram(signal, title, n_fft, fs, frame=None, show_spec=True, win_length=None, hop_length=None, savefig=False, dataset='nsynth-test'):

    if not hop_length:
        hop_length = n_fft // 2
    if not win_length:
        win_length = n_fft
    if frame is not None:
        title = "%s_%d" % (title, frame)

    res = librosa.stft(signal, n_fft=n_fft, win_length=win_length, hop_length=hop_length, center=True, )
    times = librosa.times_like(res, sr=fs, hop_length=hop_length, n_fft=n_fft)
    freq = librosa.fft_frequencies(sr=fs, n_fft=n_fft)

    mag, phase = librosa.magphase(res)

    # mel = librosa.feature.melspectrogram(S=mag, sr=fs)
    # mel = librosa.amplitude_to_db(mel, ref=np.min)
    # librosa.display.specshow(mel, x_axis='time', y_axis='mel')
    # plt.colorbar(format='%-2.0f dB')
    # plt.show()

    # try to apply log to magnitude
    # func = lambda x: math.log10(x) if x != 0 else -12
    # func = np.vectorize(func)
    # mag = func(mag)

    # power to db conversion that worked out best
    ref = np.max(mag)
    mag_db = librosa.power_to_db(mag, amin=1e-13, ref=ref, top_db=120)

    # nsynth implementation with normalized db --> output doesn't sound that good as without normalization
    # mag_squared = mag**2
    # ref = np.max(mag_squared)
    # mag_db = (librosa.power_to_db(mag_squared, amin=1e-13, ref=ref, top_db=120)/120.) + 1

    instrument = re.search("[a-z]+_[a-z]+", title).group(0)

    plt.pcolormesh(times, freq, mag_db, cmap='inferno')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time in sec')
    plt.colorbar(format='%-2.0f dB')
    plt.title(title)
    
    
    
    if savefig:
        path = './Samples/%s/specs_1024_hop256/%s' % (dataset, instrument)
        check_path(path)
        plt.savefig("%s/%s.png" % (path, title), dpi=200)
        plt.close()
    if show_spec:
        plt.show()

    return sp.Spectrogram(title, instrument, freq, times, mag_db, phase, ref)




In [4]:
def pre_process(instrument_family=None, instrument_source=None, dataset_type='nsynth-train', framed=False):
    from NSynthDataset import NSynthDataset

    nsynth = NSynthDataset('examples.json', 'Samples/%s' % dataset_type)
    

    #nsynth.shrink_to_single_instrument(4, 2)  # Extracting Keyboard_Synthetic
    if instrument_family is not None and instrument_source is not None:
        nsynth.shrink_to_single_instrument(instrument_family, instrument_source)  # Extracting Guitar Acoustic



    _, _, sr = nsynth[0]

    frame_size_millis = 20
    hann_window_length_sec = frame_size_millis / 1000
    hann_window_length = int(sr * hann_window_length_sec)
    hann_window = np.hanning(hann_window_length)
    window_hop = int(hann_window_length / 2)

    length = len(nsynth)
    for i in range(0, length):
        s, t, sr = nsynth[i]
        s, idx = librosa.effects.trim(s)  # cut silence at the end
        pad = hann_window_length - idx[1] % window_hop  # calculate the padding to multiple of hop distance including one extra frame for overlap
        s = librosa.util.fix_length(s, size=idx[1] + pad)  # fixes length to multiple of hop distance
        s = np.pad(s, (window_hop, 0), 'constant')
        #plot_time_domain(s, t, 'SignalPlots', persist=False) # plots whole time domain signal

        if framed:
            # version with calculating frames in advance
            frames = librosa.util.frame(s, frame_length=hann_window_length, hop_length=window_hop, axis=0)
            for j, frame in enumerate(frames):
                if frame.max() == 0 and frame.min() == 0: break  # discard empty frame
                frame = frame * hann_window
                plot_time_domain(frame, t, 'SignalPlots/', persist=True, frame=j)
                spec = compute_spectrogram(frame, t, 512, sr, savefig=True, frame=j, show_spec=False)
                #spec.persist()
        else:
            #plot_time_domain(s, t, 'SignalPlots', persist=True)
            spec = compute_spectrogram(s, t, 1024, sr, savefig=True, show_spec=False, dataset=dataset_type, hop_length=1024//2)
            #spec.persist(dataset_type)
            print("Persisted %d of %d" % (i+1, length), end='\r')

### Call for pre-processing

In [5]:
#pre_process(3,0, 'nsynth-test') #Pre process keybboard synthetic
#pre_process(6,1,'nsynth-test')
#pre_process(10,2, 'nsynth-test')

In [6]:
#pre_process(3,1, 'nsynth-test') # guitar electronic
#pre_process(3,0, 'nsynth-test') # guitar acoustic
#print("Hello From Notebook")
#pre_process(6,1, 'nsynth-test') # organ electronic

#pre_process(0,1, 'nsynth-test')
#pre_process(0,1, 'nsynth-valid') # bass electronic

#pre_process(0,1, 'nsynth-train')
#print("")
#pre_process(6,1, 'nsynth-test')
#print("")
#pre_process(2,0, 'nsynth-test') # flute
#print("")

#pre_process(dataset_type='nsynth-test') # pre process all
#pre_process(dataset_type='nsynth-valid', instrument_family=3, instrument_source=0)

### Training Step 

In this part the training will be defined and started

In [7]:
import time

def fit_model(criterion, dataLoader, v_dataLoader, model, optimizer, epochs, scheduler, persist=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("")
    print("Device: %s" % device)
    print("")
    
    min_valid_loss = np.inf
    
    
    for epoch in range(epochs):
        start = time.time()
        loss = 0
        feature = 0
        for batch_features, targ in dataLoader:

            batch_features = batch_features.to(device)

            optimizer.zero_grad()

            outputs = model(batch_features)

            train_loss = criterion(outputs, batch_features)

            train_loss.backward()

            optimizer.step()

            loss += train_loss.item()

            #print("Feature: %d, loss %.6f" % (feature, train_loss.item()))

            feature += 1

            print("Feature %d / %d" % (feature, len(dataLoader)), end='\r')
        loss = loss / len(dataLoader)
        
        
        
        print("")
        
        end = time.time()
        diff = (end - start)
        print('Needs %.3f Seconds' % diff)

        
        print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
        
        if epoch > 0:
            print(f"Current LR: {scheduler.state_dict()['_last_lr']}")
        #print(scheduler.state_dict())
        
        val_loss = 0.0
        if v_dataLoader is not None:
            print("Validating ...")

            
            for v_batch_features, v_targ in v_dataLoader:

                v_batch_features = v_batch_features.to(device)

                outputs = model(v_batch_features)
                feature_loss = criterion(outputs, v_batch_features)

                val_loss += feature_loss.item()

            val_loss = val_loss/len(v_dataLoader)
            print("Validation Score: %.6f" % val_loss)
            if min_valid_loss > val_loss:
                print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{val_loss:.6f})')
                min_valid_loss = val_loss
                
            scheduler.step(val_loss)
        else:
            scheduler.step(loss)
        print(f"New LR: {scheduler.state_dict()['_last_lr']}")
        
        
        if persist:
            torch.save(model, "./Models/Trained/Training_25_1_23/Baseline_Epoch_%d_Loss_%.3f_Val_%.3f" % (epoch, loss, val_loss))
        
        
        print("------------------------------------------------")

    if persist:
        torch.save(model, "Models/Trained/Training_25_1_23/Baseline_Trained")

training the preliminary model

In [8]:
def train(epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = Autoencoder1D().to(device)

    optimizer = optim.Adam(model.parameters(), lr=1e-7, weight_decay=0.9, eps=1e-8)
    #optimizer = optim.SGD(model.parameters(), lr=1e-5, momentum=0.5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=3)

    pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Params: %s " % pytorch_total_params)

    #criterion = nn.BCELoss()
    criterion = nn.MSELoss()

    specs, targets = sp.concatenate_spectrograms("nsynth-train_1024", cut_silence=True)

    np.random.shuffle(specs)
    print("")

    v_specs, v_targets = sp.concatenate_spectrograms("nsynth-valid_1024", cut_silence=True)
    np.random.shuffle(v_specs)

    tensor = torch.Tensor(specs)
    dataset = TensorDataset(tensor, tensor)
    dataLoader = DataLoader(dataset, 32, num_workers=4, pin_memory=True, shuffle=True)
    
    v_tensor = torch.Tensor(v_specs)
    v_dataset = TensorDataset(v_tensor, v_tensor)
    v_dataLoader = DataLoader(v_dataset, 32, num_workers=4, pin_memory=True, shuffle=True)

    fit_model(criterion, dataLoader, v_dataLoader, model, optimizer, epochs, scheduler, persist=True)

In [9]:

#train(1000)



In [10]:
def test_model(training_folder, dataset, instrument=None, pitch=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_path = "./Models/Trained/%s/Trained_Best" % training_folder
    model = torch.load(model_path, map_location=device)
    model.eval()
    for param in model.parameters():
        param.grad = None
    torch.set_flush_denormal(True)

    if instrument is not None:
        specs, targets = sp.concatenate_spectrograms(dataset, [instrument], cut_silence=True, desired_pitch=pitch)
    else:
        specs, targets = sp.concatenate_spectrograms(dataset, cut_silence=True, desired_pitch=pitch)
    if len(specs) == 0: return

    #np.random.shuffle(specs)
    tensor = torch.Tensor(specs)
    testset = TensorDataset(tensor)
    loader = DataLoader(testset, 1, num_workers=6)
    

    criterion = nn.MSELoss()
    print("")
    
    with torch.no_grad():

        feature = 1
        test_loss = 0.0
        for batch_features in loader:
            
            features = batch_features[0].to(device)
            output = model.forward(features)
            
            loss = criterion(output, features)
            
            test_loss += loss
            print("Feature %d / %d" % (feature, len(loader)), end='\r')
            feature += 1
            
        test_loss = test_loss/len(loader)

        print("")
        print("Test Score: %.6f Pitch: %s" % (test_loss, pitch))
            
    
    

In [11]:
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='065') # Test Score: 1.086052
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='070')
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='075')
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='080')
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='085')
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='090')
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='095')
#test_model("Training_full_2_2_23", dataset="nsynth-test_1024", pitch='100')


In [12]:
#instrument_folders = os.listdir('SpectrogramData.nosync/nsynth-test_1024')

#for instrument in instrument_folders:
#    if instrument != '.DS_Store': # and instrument not in ["keyboard_electronic", "bass_synthetic", "keyboard_acoustic", "brass_acoustic",
                                  #                      "organ_electronic", "mallet_acoustic", "keyboard_synthetic", "string_acoustic",
                                  #                      "guitar_acoustic", "bass_electronic", "guitar_electronic", "reed_acoustic",
                                  #                      "vocal_acoustic", "flute_synthetic", "vocal_synthetic"]:
#        print("Score %s" % instrument)
#        test_model("Training_full_2_2_23", "nsynth-test_1024", instrument=instrument,)
#        print("")


In [13]:
#instrument = 'brass_acoustic'

#test_model("Training_full_2_2_23", "nsynth-valid_1024", pitch='059')

In [14]:
import soundfile


def reconstruct(training_folder, pitch=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path = "./Models/Trained/%s/Trained_Best" % training_folder
    model = torch.load(model_path, map_location=device)

    for param in model.parameters():
        param.grad = None
    torch.set_flush_denormal(True)
    
    specs, targets, phases, power_refs  = sp.concatenate_spectrograms("nsynth-test_1024", all_data=True, cut_silence=True, desired_pitch=pitch)
    
    tensor = torch.Tensor(specs)
    testset = TensorDataset(tensor)
    loader = DataLoader(testset, num_workers=6)
    criterion = nn.MSELoss()

    #criterion = nn.MSELoss()
    print("")
    
    spec_dict = {}
    
    with torch.no_grad():
        
        test_loss = 0.0
        target_idx = 0
        for batch_features in loader:
            
            features = batch_features[0].to(device)
            output = model.forward(features)
            
            if spec_dict.get(targets[target_idx]) is None:
                spec_dict[targets[target_idx]] = {}
                spec_dict[targets[target_idx]]['spec'] = []
                spec_dict[targets[target_idx]]['phase'] = []
                spec_dict[targets[target_idx]]['power_ref'] = []
            
            spec_dict[targets[target_idx]]['spec'].append(output[0,0].data.cpu().numpy())
            spec_dict[targets[target_idx]]['phase'].append(phases[target_idx])
            spec_dict[targets[target_idx]]['power_ref'].append(power_refs[target_idx])

            loss = criterion(output, features)
            test_loss += loss

            print("Feature %d / %d" % (target_idx, len(loader)), end='\r')
            
            
            target_idx += 1

        test_loss /= len(loader)
        print("")
        print("Test Loss: %.6f pitch %s" % (test_loss, pitch))


    for key in spec_dict:

        spec = np.array(spec_dict[key]['spec']).transpose()

        plot_reconstructions(spec, key)

        phases = np.array(spec_dict[key]['phase'])
        phases = phases.transpose()
        power_refs = np.array(spec_dict[key]['power_ref'])
        reconstruct_audio(spec, power_refs, key, phases)
        
        
    
    
def plot_reconstructions(reconstruction, title, interpolated=False):
    
    from utils import get_frequency_bins, get_time_resolution


    times = get_time_resolution(512, reconstruction.shape[1], 16000)
    freq = get_frequency_bins(16000, 512)
    
    plt.pcolormesh(times, freq, reconstruction, cmap='inferno', shading='auto')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time in sec')
    plt.colorbar(format='%-2.0f dB')
    plt.title('Output spectrogram', loc='center', wrap=True)
    if interpolated:
        plt.savefig('./Output/reconstructed_1D/interpolated/%s' % title, dpi=200)
    else:
        plt.savefig('./Output/reconstructed_1D/%s' % title, dpi=200)
    plt.close()


def reconstruct_audio(mag, power, title, phase=None, interpolated=False):

    mag = librosa.db_to_power(mag, ref=power)
    if phase is not None:
        mag_phase = mag * phase
        sig = librosa.istft(mag_phase, n_fft=1024, hop_length=512, win_length=1024)
        plot_time_domain(sig, title, basepath='Output/SignalPlots_1D', persist=True)
        filename = 'Output/wav_1D/%s.wav' % title
        soundfile.write(filename, sig, 16000)

    plot_path = 'Output/SignalPlots_1D'
    sig_path = 'Output/wav_1D'
    if interpolated:
        plot_path = os.path.join(plot_path, 'interpolated')
        sig_path = os.path.join(sig_path, 'interpolated')

    sig_griff = librosa.griffinlim(mag, n_fft=1024, hop_length=512, win_length=1024)
    plot_time_domain(sig_griff, "%s_gf" % title, basepath=plot_path, persist=True)
    filename_gf = '%s_gl.wav' % title
    soundfile.write(os.path.join(sig_path, filename_gf), sig_griff, 16000)


In [18]:
#reconstruct("Training_full_2_2_23", pitch='050')
#reconstruct("Training_full_2_2_23", pitch='045')
#reconstruct("Training_full_2_2_23", pitch='040') # Test Loss: 145.0000 pitch 040
#reconstruct("Training_full_2_2_23", pitch='035')
#reconstruct("Training_full_2_2_23", pitch='030')
reconstruct("Training_full_2_2_23", pitch='070')
reconstruct("Training_full_2_2_23", pitch='080')
reconstruct("Training_full_2_2_23", pitch='090')
reconstruct("Training_full_2_2_23", pitch='100')



Concat Process: 49/49
Feature 4771 / 4772
Test Loss: 110.492836 pitch 070
Concat Process: 48/48
Feature 4655 / 4656
Test Loss: 124.739410 pitch 080
Concat Process: 45/45
Feature 3622 / 3623
Test Loss: 138.434555 pitch 090
Concat Process: 25/25
Feature 1633 / 1634
Test Loss: 243.706528 pitch 100


In [16]:
def encode(training_folder, pitch=None, make_plot=True, instruments=None):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path = "./Models/Trained/%s/Trained_Best" % training_folder
    model = torch.load(model_path, map_location=device)

    specs, targets, phases, power_refs  = sp.concatenate_spectrograms("nsynth-test_1024", instruments, all_data=True, cut_silence=True, desired_pitch=pitch)

    tensor = torch.Tensor(specs)
    testset = TensorDataset(tensor)
    loader = DataLoader(testset, num_workers=6)

    print("")
    embedding_dict = {}
    with torch.no_grad():

        feature_idx = 0
        for feature in loader:

            feature = feature[0].to(device)

            encoding = model.encode(feature)

            if embedding_dict.get(targets[feature_idx]) is None:
                embedding_dict[targets[feature_idx]] = {}
                embedding_dict[targets[feature_idx]]['spec'] = []
                embedding_dict[targets[feature_idx]]['phase'] = []
                embedding_dict[targets[feature_idx]]['power_ref'] = []

            embedding_dict[targets[feature_idx]]['spec'].append(encoding[0,0].data.cpu().numpy())
            embedding_dict[targets[feature_idx]]['phase'].append(phases[feature_idx])
            embedding_dict[targets[feature_idx]]['power_ref'].append(power_refs[feature_idx])

            print("Feature %d / %d" % (feature_idx, len(loader)), end='\r')
            feature_idx += 1

    print("")
    if make_plot:
        for key in embedding_dict:

            embedding = np.array(embedding_dict[key]['spec']).transpose()
            y_scale = np.array([i for i in range(0, embedding.shape[0])])
            x_scale = np.array([i for i in range(0, embedding.shape[1])])

            #time_res = get_time_resolution(512, embedding.shape[1], 16000)

            plt.pcolormesh(x_scale, y_scale, embedding, cmap='inferno', shading='auto')
            plt.colorbar()
            plt.title("Embedding of %s" % key)
            plt.xlabel("Encoder output vectors")
            plt.savefig("Output/embeddings_1D/%s" % key, dpi=200)
            plt.close()

    return embedding_dict

def decode_interpolated(training_folder, z, title, power_refs):
    print("Decode")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path = "./Models/Trained/%s/Trained_Best" % training_folder
    model = torch.load(model_path, map_location=device)

    decodable = z.astype(np.float32)[:, np.newaxis, :]

    tensor = torch.tensor(decodable)
    dataset = TensorDataset(tensor)
    loader = DataLoader(dataset, num_workers=6)
    output_spec = []
    with torch.no_grad():


        count = 1
        for feature in loader:

            feature = feature[0].to(device)

            output = model.decode(feature)
            output_spec.append(output[0,0].data.cpu().numpy())
            print("Feature %d / %d" % (count, len(loader)), end='\r')
            count += 1

    print("")
    output = np.array(output_spec).transpose()

    plot_reconstructions(output, '%s_output_spec' % title, interpolated=True)
    reconstruct_audio(output, power_refs, '%s_output' % title, interpolated=True)


In [17]:
# i_rate (interpolation rate) specifies how much of vector b should be present in result
def interpolate(vec_a, vec_b, i_rate=0.5):

    vec_combined = np.insert(vec_b, np.arange(len(vec_a)), vec_a)
    vec_combined_i = [i for i in range(0, len(vec_combined))]
    new_indices = [i + i_rate for i in range(0, len(vec_combined), 2)]
    result = np.interp(new_indices, vec_combined_i, vec_combined)

    return result

def sum_signal(vec_a, vec_b):

    return vec_a + vec_b

def use_max(vec_a, vec_b):

    res_arr = []

    for a, b in zip(vec_a, vec_b):
        res_arr.append(max(a, b))

    return res_arr


def interpolate_spec(instrument_a, instrument_b, title_a, title_b, title, plot_original=False):

    target_spec = []
    target_max_spec = []
    for vec_a, vec_b in zip(instrument_a, instrument_b):
        target_spec.append(interpolate(vec_a, vec_b, 0.5))
        #target_max_spec.append(use_max(vec_a, vec_b))

    target_np = np.array(target_spec)

    y_scale = np.array([i for i in range(0, target_np.shape[1])])
    x_scale = np.array([i for i in range(0, target_np.shape[0])])
    #time_res = get_time_resolution(512, target_np.shape[0], 16000)

    if plot_original:
        fig, (ax1, ax2, ax3) = plt.subplots(1,3)
        fig.suptitle('Embeddings')
        #fig.supxlabel('Encoder output vectors')
        pcm1 = ax1.pcolormesh(x_scale, y_scale, instrument_a.transpose() , cmap='inferno', shading='auto')
        pcm2 = ax2.pcolormesh(x_scale, y_scale, instrument_b.transpose() , cmap='inferno', shading='auto')
        pcm3 = ax3.pcolormesh(x_scale, y_scale, target_np.transpose() , cmap='inferno', shading='auto')

        ax1.set_title(title_a, fontsize='medium')
        ax2.set_title(title_b, fontsize='medium')
        ax3.set_title('interpolated', fontsize='medium')

        ax1.set_xlabel('output vectors', fontsize='small')
        ax2.set_xlabel('output vectors', fontsize='small')
        ax3.set_xlabel('output vectors', fontsize='small')

        fig.colorbar(pcm1, ax=ax1)
        fig.colorbar(pcm2, ax=ax2)
        fig.colorbar(pcm3, ax=ax3)

        plt.tight_layout()

        plt.savefig('Output/embeddings_1D/interpolated/%s_original.png' % title, dpi=200)
        plt.close()

    else:
        plt.imshow(target_np, cmap='inferno')
        plt.title(title)
        plt.savefig('Output/embeddings_1D/interpolated/%s.png' % title, dpi=200)
        plt.close()

    return target_np

    # sum_sig = sum_signal(instrument_a, instrument_b)
    #
    # plt.imshow(np.array(sum_sig).transpose(), cmap='inferno')
    # plt.title(title + " sum")
    # plt.show()
    # plt.close()
    #
    # plt.imshow(np.array(target_max_spec).transpose(), cmap='inferno')
    # plt.title(title + " max")
    # plt.show()
    # plt.close()





In [20]:
folder_to_model = "Training_full_2_2_23"
embedding_dict1 = encode(folder_to_model, pitch='070', make_plot=True, instruments=['guitar_acoustic'])
#embedding_dict2 = encode(folder_to_model, pitch='070',  make_plot=True, instruments=['organ_electronic'])





Concat Process: 7/7
Feature 693 / 694


In [55]:
title_a = 'guitar_acoustic_014-060-127'
title_b = 'brass_acoustic_016-060-127'

instrument_a = np.array(embedding_dict1[title_a]['spec'])
instrument_b = np.array(embedding_dict1[title_b]['spec'])
power_ref_a = np.array(embedding_dict1[title_a]['power_ref'])
power_ref_b = np.array(embedding_dict1[title_b]['power_ref'])

len_a = instrument_a.shape[0]
len_b = instrument_b.shape[0]

if len_a < len_b:
    instrument_b = instrument_b[:len_a]
    power_ref_b = power_ref_b[:len_a]
else:
    instrument_a = instrument_a[:len_b]
    power_ref_a = power_ref_a[:len_b]

new_title = "%s&%s" % (title_a, title_b)
res_emb = interpolate_spec(instrument_a, instrument_b, title_a, title_b, '%s&%s' % (title_a, title_b), plot_original=True)

mean_power_ref = (power_ref_a + power_ref_b) / 2

decode_interpolated(folder_to_model, res_emb, '%s&%s' % (title_a, title_b), mean_power_ref)

Decode
Feature 98 / 98
