In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import itertools
import json
import import_ipynb
import csv
from tqdm import tqdm
from scipy.io import wavfile
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Input, Dense,Conv1D, MaxPooling1D, UpSampling1D, Flatten, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.python.ops import math_ops
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import uncertainties as unc
import uncertainties.umath as umath

2023-07-31 22:08:14.985610: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
####################################
# utility functions
# def cut_old(arr, length):
#   idx = len(arr)%length
#   out = []
#   while(idx+length <= len(arr)):
#     out.append(arr[idx:idx+length])
#     idx += length
#   return np.array(out)

def cut(arr, length):
  ueberhang = len(arr) % length
  a = np.array(arr[ueberhang:]).reshape(-1, length)
  return a

# def loadSong(fName, numTotalSongs = 1):
#   fs, data = wavfile.read(inpathTrain + fName)
#   all_data = [data]

#   if numTotalSongs > 1:
#     seed = sum([ord(char) for char in fName])
#     random.seed(seed)
#     file_nams = random.sample(fileNames[:170], numTotalSongs-1)
#     for name in file_nams:
#       fs, data1 = wavfile.read(inpathTrain + name)
#       all_data.append(data1)

#   concatenated_data = np.concatenate(all_data)
#   if concatenated_data.ndim > 1:
#     mono_data = np.mean(concatenated_data, axis=1, dtype='int16')
#   else:
#     mono_data = concatenated_data

#   return mono_data.astype('int16')

def loadSong(fName, numTotalSongs = 1, percentage_of_song = 1):
  fs, data = wavfile.read(inpathTrain + fName)

  if numTotalSongs > 1:
      # so that every song evey time picts the same songs for the additionals
      seed = sum(ord(char) for char in fName)
      random.seed(seed)
      file_nams = random.sample(fileNames[:170], numTotalSongs - 1)
      for name in file_nams:
          fs, data1 = wavfile.read(inpathTrain + name)
          data = np.concatenate((data, data1), axis=0)
  
  if data.ndim > 1:
    mono_data = np.mean(data, axis=1, dtype='int16')
  else:
    mono_data = data

  return mono_data.astype('int16')



def loadSongCut(fName, silence_prob = 0, numTotalSongs = 1, percentage_of_song = 1):
  data = loadSong(fName, numTotalSongs, percentage_of_song)
  data = cut(data, snippitLength)

  # Replace rows with silence based on silence_prob
  if silence_prob!=0:
    num_rows = data.shape[0]
    num_silence_rows = int(num_rows * silence_prob)
    silence_rows = np.zeros((num_silence_rows, data.shape[1]), dtype='int16')
    data[:num_silence_rows, :] = silence_rows

  scaler[fName] = MinMaxScaler()
  #data = quadratic_scaler(data, 5)
  data = scaler[fName].fit_transform(data)


  # takes very long
  # rng = np.random.default_rng()
  # rng.shuffle(data)
  # if percentage_of_song != 1:
  #   index = int(data.shape[0]*percentage_of_song)
  #   data = data[:index]
  Xt, Xv = train_test_split(data, test_size=0.3, random_state=42)
  if percentage_of_song != 1:
    index_t = int(len(Xt)*percentage_of_song)
    index_v = int(len(Xv)*percentage_of_song)
    Xt = Xt[:index_t]
    Xv = Xv[:index_v]
  return Xt, Xv



def snipLoss(y_true, y_pred):
  snipWeight = tf.convert_to_tensor([int(np.cosh(x)) for x in range(-5, 5, snippitLength)], dtype='float32')
  squared_difference = math_ops.squared_difference(y_true, y_pred)
  loss = math_ops.Mul(x = squared_difference, y = snipWeight)
  loss = math_ops.log1p(loss)
  return loss



def si_snr(original, estimate):
  # original and estimate are tensors of shape (batch_size, time_steps)
  # compute the dot product of original and estimate along the time axis
  dot = tf.reduce_sum(original * estimate, axis=-1, keepdims=True)
  denominator = tf.reduce_sum(original ** 2, axis=-1, keepdims=True)
  # compute the scaled target
  scaled_target = dot * original / denominator
  # compute the noise
  e_noise = estimate - scaled_target
  # compute the SI-SNR in decibels
  si_snr = 10 * tf.math.log(tf.reduce_sum(scaled_target ** 2, axis=-1) / tf.reduce_sum(e_noise ** 2, axis=-1)) / tf.math.log(10.0)
  # return the SI-SNR tensor of shape (batch_size,)
  return si_snr



def si_snr_std(original, estimate):
  dot = np.sum(original * estimate, axis=-1, keepdims=True)
  # compute the energy of target along the time axis
  denominator = np.sum(original ** 2, axis=-1, keepdims=True)
  # compute the scaled target
  scaled_target = dot * original / denominator
  # compute the noise
  e_noise = estimate - scaled_target
  # compute the SI-SNR in decibels
  si_snr = 10 * np.log10(np.sum(scaled_target ** 2, axis=-1) / np.sum(e_noise ** 2, axis=-1))
  # return the SI-SNR array of shape (batch_size,)
  return si_snr



def mean_with_error(arr):
    arr_unc = unc.ufloat(np.mean(arr), np.std(arr, ddof=1) / np.sqrt(len(arr)))
    return f"{arr_unc.nominal_value:.2f} +- {arr_unc.std_dev:.2f}"

# numTotalSongs = 17
# percentage_of_song = float(1/numTotalSongs)
# Xt, Xv = loadSongCut(fileNames[0], numTotalSongs = numTotalSongs, percentage_of_song = percentage_of_song)

In [3]:
####################################
#####  plot history

def plot_loss(ax, network_history):
    loss = np.concatenate([network_history[key].history['loss'] for key in network_history.keys()])
    val_loss = np.concatenate([network_history[key].history['val_loss'] for key in network_history.keys()])

    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss')
    ax.set_title('Loss')
    ax.plot(loss, label='Training')
    ax.plot(val_loss, label='Validation')
    ax.legend()

def plot_si_snr(ax, network_history):
    si_snr = np.concatenate([network_history[key].history['si_snr'] for key in network_history.keys()])
    val_si_snr = np.concatenate([network_history[key].history['val_si_snr'] for key in network_history.keys()])

    ax.set_xlabel('Epochs')
    ax.set_ylabel('SI_SNR')
    ax.set_title('SI-SNR')
    ax.plot(si_snr, label='Training')
    ax.plot(val_si_snr, label='Validation')
    ax.legend()

def plot_history(network_history, name):
    fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharex=True, sharey=False)

    plot_loss(ax[0], network_history)
    plot_si_snr(ax[1], network_history)

    plt.tight_layout()
    plt.savefig(name)
    # plt.show()
    plt.clf()


In [4]:
####################################
#####  buildModel (Hyperparameter grid search)

def buildModel(compression_ratio = 0.5, numDense = 1, numConv = 8, numConvLayer = 0, loss_fct = snipLoss, use_bias = False, learning_rate = 0.001):
  
  latentSize = int(compression_ratio*snippitLength)

  # keep tensorflow from allocating more memory as it currently needs
  physical_devices = tf.config.experimental.list_physical_devices('GPU')
  for i in physical_devices:
      tf.config.experimental.set_memory_growth(i, True)
  tf.device('/device:GPU:0')

  input = Input(shape=(snippitLength,1))
  x = input

  # Convolutional part of encoder
  for i in range(numConvLayer):
    x = Conv1D(numConv, 5, activation='relu', padding='same')(x)
    x = MaxPooling1D(2, padding = 'same')(x)

  convShape = x.shape
  # calculate flatten dimension
  flsize = 1
  for i in x.shape:
    if(i != None):
      flsize*= i
  x = Flatten()(x)

  # Dense part of encoder
  denses = [int(i) for i in np.linspace(flsize, latentSize, numDense+1)]
  print(denses)
  print(flsize)
  print(latentSize)
  for i in denses[1:]:
    x = Dense(i, activation='relu', use_bias=use_bias)(x)
    
  encoded = x

  # Dense part of decoder
  x = encoded
  for i in denses[::-1][1:]:
    if(numConvLayer == 0 and i == snippitLength):
      x = Dense(i, activation='sigmoid')(x)
    else:
      x = Dense(i, activation='relu', use_bias=use_bias)(x)

  if(numConvLayer == 0):
    decoded = x

  x = Reshape(convShape[1:])(x)

  # Convolutional part of decoder
  for i in range(numConvLayer):
    x = Conv1D(numConv, 5, activation='relu', padding='same')(x)
    x = UpSampling1D(2)(x)
  if(numConvLayer != 0):
    decoded = Conv1D(1, 5, activation='sigmoid', padding='same')(x)

  autoencoder = Model(input, decoded)
  autoencoder = Model(input, Flatten()(decoded))

  autoencoder.compile(optimizer=Adam(learning_rate=learning_rate), loss=loss_fct, metrics=[si_snr])
  
  print(f'current model: ratio={compression_ratio},numDense={numDense},numConv={numConv},numConvLayer={numConvLayer}')
  autoencoder.summary()
  return autoencoder

get_custom_objects()['snipLoss'] = snipLoss
get_custom_objects()['si_snr'] = si_snr

# testing:
# Xt, Xv = loadSongCut('1727_schubert_op114_2.wav')
# buildModel(numDense=1).fit(Xt[:2], Xt[:2],
#             epochs=1,
#             batch_size=512,
#             shuffle=True,

#             validation_data=(Xv[:2], Xv[:2]))

In [5]:
####################################
#####  evaluate Songs

def evaluateTestSongs(autoencoder, num = 0):
  test_evaluated = []
  if num!=0:
    numTestSongs = num
  print(f'evaluating {numTestSongs} test songs')
  for songname in tqdm(reversed(fileNames[-numTestSongs:])):
      orig = loadSong(songname)
      origSnip = cut(orig, snippitLength)
      orig = np.concatenate(origSnip)
      
      if(songname in scaler.keys()):
        scaler_Example = scaler[songname]
        origSnip_transformed = scaler_Example.transform(origSnip)
      else:
        scaler_Example = MinMaxScaler()
        origSnip_transformed = scaler_Example.fit_transform(origSnip)

      # autoencode song
      a = autoencoder.predict(origSnip_transformed)
      a = a.reshape(-1, snippitLength)
      XpredSnip = scaler_Example.inverse_transform(a)
      estimate_uncorr = np.concatenate(XpredSnip).astype('int16')


      silence = np.zeros((1,snippitLength), dtype = 'int16')
      a = scaler_Example.transform(silence)
      a = autoencoder.predict(a)
      a = a.reshape(-1, snippitLength)
      Xsilence = scaler_Example.inverse_transform(a)[0]

      # remove noise generated by silence
      XpredSnip_minussilence = np.array(XpredSnip) - Xsilence
      Xpred = np.concatenate(XpredSnip_minussilence).astype('int16')

      test_loss, test_si_snr_uncorr = autoencoder.evaluate(origSnip_transformed, origSnip_transformed, verbose=2)
      
      test_si_snr_corrected = si_snr_std(orig, Xpred)
      output_wav_name = f'{songname}_SNR={test_si_snr_corrected:.1f}.wav'
      wavfile.write(f'{output_folder}original_{songname}', samplerate, orig)
      wavfile.write(output_folder + output_wav_name, samplerate, Xpred)
      print(f"Test song predicted and saved: {output_wav_name}")

      test_evaluated.append([songname, test_loss, test_si_snr_uncorr, test_si_snr_corrected])
  return test_evaluated

In [14]:
####################################
#####  waveform plots
#####  predict test song and save it

def plotWave(autoencoder, name, compression_ratio, Test_Song = None):
  if Test_Song == None:
    Test_Song = '1760_d958-4.wav'
    # Test_Song = fileNames[-1]
  # exampleSong = name
  # exampleSong = '1727_schubert_op114_2.wav'
  orig = loadSong(Test_Song)
  origSnip = cut(orig, snippitLength)
  orig = np.concatenate(origSnip)

  if(Test_Song in scaler.keys()):
    scaler_Example = scaler[Test_Song]
    origSnip_transformed = scaler_Example.transform(origSnip)
  else:
    scaler_Example = MinMaxScaler()
    origSnip_transformed = scaler_Example.fit_transform(origSnip)

  # autoencode song
  a = autoencoder.predict(origSnip_transformed)
  a = a.reshape(-1, snippitLength)
  XpredSnip = scaler_Example.inverse_transform(a)
  estimate_uncorr = np.concatenate(XpredSnip).astype('int16')

  silence = np.zeros((1, snippitLength), dtype = 'int16')
  a = scaler_Example.transform(silence)
  a = autoencoder.predict(a)
  a = a.reshape(-1, snippitLength)
  Xsilence = scaler_Example.inverse_transform(a)[0]

  # remove noise generated by silence
  # XpredSnip_minussilence = [i-Xsilence for i in XpredSnip]
  XpredSnip_minussilence = np.array(XpredSnip) - Xsilence
  Xpred = np.concatenate(XpredSnip_minussilence).astype('int16')
  estimate_corr = Xpred

  # test_loss, test_si_snr = autoencoder.evaluate(origSnip_transformed, origSnip_transformed)
  si_snr_uncorr = si_snr_std(orig, estimate_uncorr)
  print(f'ucorrected SI-SNR = {si_snr_uncorr} dB')

  si_snr_corr = si_snr_std(orig, estimate_corr)
  print(f'corrected SI-SNR = {si_snr_corr} dB')


  # output_wav_name = f'snln={snippitLength}_cmpr={compression_ratio:.1f}_loss={loss_fct.__name__}_SNR={testwav_si_snr:.1f}.wav'
  output_wav_name = f'{Test_Song}_{compression_ratio:.1f}_SNR={si_snr_corr:.1f}.wav'
  wavfile.write(f'{output_folder}original_{Test_Song}', samplerate, orig)
  wavfile.write(f'{output_folder}UNCORR_{output_wav_name}', samplerate, Xpred)
  wavfile.write(output_folder + output_wav_name, samplerate, Xpred)
  print(f"Test song predicted and saved: {output_wav_name}")


  ###### plots
  plt.plot(orig, linewidth = 0.1)
  plt.plot(orig-estimate_corr, linewidth = 0.1)
  plt.savefig(name + "whole_corr.pdf")
  plt.clf()
  plt.plot(orig, linewidth = 0.1)
  plt.plot(orig-estimate_uncorr, linewidth = 0.1)
  plt.savefig(name + "whole_uncorr.pdf")
  plt.clf()
  ####################################
  #####  see difference in waveform detailed
  nrows = 2
  ncols = 6
  snips = [0, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000]
  nrows = 2
  ncols = 2
  snips = [0, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000]

  fig, ax = plt.subplots(nrows, ncols, figsize=(6*ncols, 6*nrows), sharey = True, sharex = True)
  s = 0
  for i in range(nrows):
    for j in range(ncols):
      ax[i][j].plot(origSnip[snips[s]], linewidth = 0.5, c = 'b')
      ax[i][j].plot(XpredSnip_minussilence[snips[s]], linewidth = 0.5, c = 'r')
      s +=1
  plt.savefig(name + "snip_corrected.pdf")
  plt.clf()

  fig, ax = plt.subplots(nrows, ncols, figsize=(6*ncols, 6*nrows), sharey = True, sharex = True)
  s = 0
  for i in range(nrows):
    for j in range(ncols):
      ax[i][j].plot(origSnip[snips[s]], linewidth = 0.5, c = 'b')
      ax[i][j].plot(XpredSnip[snips[s]], linewidth = 0.5, c = 'r')
      # ax[i][j].plot(XpredSnip_minussilence[snips[s]], linewidth = 0.5, c = 'r')
      s +=1
  plt.savefig(name + "snip_notcorrected.pdf")
  plt.clf()

# model_save_path = output_folder + 'train_compression_rates/' + f'model_train_1_96_3.keras' #100
# autoencoder = tf.keras.models.load_model(model_save_path)
# plotWave(autoencoder, f'{output_folder}train_compression_rates/model_0.2_1_96_3wave_', 0.2)

In [7]:
def read_histories_from_csv(file_path):
    histories = {}
    with open(file_path, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip the header row
        for row in reader:
            filename = row[0]
            loss = float(row[1])
            val_loss = float(row[2])
            histories[filename] = {'loss': [loss], 'val_loss': [val_loss]}
    return histories

In [8]:
# paths
# drive.mount('/content/drive')
# inpathTrain = "/content/drive/MyDrive/Machine Learning/Autoencoder/train_data/"
# inpathOut = "/content/drive/MyDrive/Machine Learning/Autoencoder/output/"
inpathTrain = "songs/wav/"
output_folder = "output/Versuch_new/"
fileNames = os.listdir(inpathTrain)
random.seed(42)
fileNames = random.sample(fileNames, len(fileNames))

hyperparamsearch_folder = output_folder + 'hyperparamsearch'
train_compression_rates_folder = output_folder + 'train_compression_rates'

if not os.path.exists(hyperparamsearch_folder):
    os.mkdir(hyperparamsearch_folder)

if not os.path.exists(train_compression_rates_folder):
    os.mkdir(train_compression_rates_folder)

scaler = {}

# global variables
samplerate = 44_100
snippitLength = 64

loss_fct = snipLoss

In [None]:
# run 1
param_space = {'compression_ratio' : [0.1, 0.3, 0.5, 0.7, 0.9],
               'numDense' : [1, 2, 3],
               'numConv' : [8, 16],
               'numConvLayer' : [0, 1, 2]}


# param_space = {'compression_ratio' : np.linspace(0.1,0.9,2),
#                'numDense' : [2, 3],
#                'numConv' : [8, 6],
#                'numConvLayer' : np.linspace(0.1,0.2,2)} # small 1
# param_space = {'compression_ratio' : [0.1, 0.2],
#                'numDense' : [3,4],
#                'numConv' : [4, 8],
#                'numConvLayer' : [0, 1]} # small 2


# run 2.1
param_space = {'compression_ratio' : [0.2],
               'numDense' : [4, 5, 6],
               'numConv' : [8, 16, 24, 32],
               'numConvLayer' : [0, 1, 2, 3, 4]}

# param_space = {'compression_ratio' : [0.2],
#                'numDense' : [2, 3, 4],
#                'numConv' : [32, 64],  # 128 too much memory need
#                'numConvLayer' : [1, 2]}

# run 2.2
param_space = {'compression_ratio' : [0.2],
               'numDense' : [3, 4, 5],
               'numConv' : [64, 128],  # 128 too much memory need
               'numConvLayer' : [2]}

# run 2.3
param_space = {'compression_ratio' : [0.2],
               'numDense' : [1],
               'numConv' : [32, 64],  # 128 too much memory need
               'numConvLayer' : [1, 2]}


# param_space = {'compression_ratio' : [0.2],
#                'numDense' : [1],
#                'numConv' : [64, 96, 128],
#                'numConvLayer' : [4, 6]}


# param_space = {'compression_ratio' : [0.2],
#                'numDense' : [5, 6],
#                'numConv' : [24, 32],
#                'numConvLayer' : [ 3, 4]}

# run 2.4
param_space = {'compression_ratio' : [0.2],
               'numDense' : [1],
               'numConv' : [ [32, 40, 48, 56, 64, 80, 112, 160, 192, 256]],
               'numConvLayer' : [24, 32]}

# run 3
param_space = {'compression_ratio' : [0.2],
               'numDense' : [1],
               'numConv' : [32, 64, 128, 224],
               'numConvLayer' : [1, 2]}

# run 4
param_space = {'compression_ratio' : [0.2],
               'numDense' : [1],
               'numConv' : [96],
               'numConvLayer' : [2, 3]}


#### quick test #####
param_space = {'compression_ratio' : [0.2],
               'numDense' : [1],
               'numConv' : [64, 96],
               'numConvLayer' : [2]}



value_combis = itertools.product(*[v for v in param_space.values()])
param_combis = []
for combi in value_combis:
  param_combi = {key: value for key, value in zip(param_space.keys(), combi)}
  if param_combi['numConvLayer'] == 0:
    param_combi['numConv'] = 0
  param_combis.append(param_combi)

batch_size = 512

# real values
numTotalSongs = 17
percentage_of_song = float(1/(numTotalSongs))
# numHyperTrainSongs = 170
numHyperTrainSongs = 50

# test values
numTotalSongs = 2
percentage_of_song = float(1/(numTotalSongs))
numHyperEpochs = 2
numHyperTrainSongs = 2

# param_combis = param_combis[5:]
time_per_combi = 2.6
print(f'estimated time = {time_per_combi*len(param_combis)/60:.1f} h ({len(param_combis)} sets)')
# param_combis

In [None]:
####################################
#####  Hyperparameter grid search
import stopwatch as sw

t = sw.stopwatch(title='gridsearch', time_unit='s')

# Load existing results from the JSON file if it exists
existing_results = []
existing_file_path = output_folder + 'hyperparamsearch/' + 'searchResults_new.json'
if os.path.exists(existing_file_path):
    with open(existing_file_path, 'r') as file:
        existing_results = json.load(file)

search_results = []
model_save_path = output_folder + 'hyperparamsearch/' + f'model.keras'
if os.path.exists(model_save_path):
    os.remove(model_save_path)

for hyperParamSet in tqdm(param_combis):
  autoencoder = buildModel(hyperParamSet['compression_ratio'],
                           hyperParamSet['numDense'],
                           hyperParamSet['numConv'],
                           hyperParamSet['numConvLayer'])

  histories = {}
  t.task('hyperparam')
  for idx, filename_train in tqdm(enumerate(fileNames[:numHyperTrainSongs])):
    Xt, Xv = loadSongCut(filename_train, numTotalSongs = numTotalSongs, percentage_of_song = percentage_of_song)
    histories[filename_train] = autoencoder.fit(Xt, Xt,
                epochs=numHyperEpochs,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(Xv, Xv))
    del Xt
    del Xv
    if (idx % int(5/numHyperEpochs) == 0) and (idx != 0):
      autoencoder.save(model_save_path)
      del autoencoder
      autoencoder = tf.keras.models.load_model(model_save_path)
  t.stop()
  del autoencoder
  tf.keras.backend.clear_session()

  pdfname = f'HyperParOpt, compression_ratio= {hyperParamSet["compression_ratio"]:.1f}, numDense= {hyperParamSet["numDense"]}, numConvLayer= {hyperParamSet["numConvLayer"]}, numConv= {hyperParamSet["numConv"]}.pdf'
  plot_history(histories, output_folder + 'hyperparamsearch/' + pdfname)

  loss = []
  val_loss = []
  train_si_snr = []
  val_si_snr = []
  for key in histories.keys():
    loss.append(histories[key].history['loss'])
    val_loss.append(histories[key].history['val_loss'])
    train_si_snr.append(histories[key].history['si_snr'])
    val_si_snr.append(histories[key].history['val_si_snr'])
  loss         = np.concatenate(loss)
  val_loss     = np.concatenate(val_loss)
  train_si_snr = np.concatenate(train_si_snr)
  val_si_snr   = np.concatenate(val_si_snr)

  best_val_epoch    = np.argmax(val_si_snr)
  best_val_si_snr   = np.max(val_si_snr)
  best_val_loss     = np.min(val_loss)
  best_train_si_snr = np.max(train_si_snr)
  best_train_loss   = np.min(loss)

  search_results.append({
    **hyperParamSet,
    'best_val_epoch': best_val_epoch,
    'best_val_si_snr': best_val_si_snr,
    'best_val_loss': best_val_loss,
    'best_train_si_snr': best_train_si_snr,
    'best_train_loss': best_train_loss
  })


  latest_results = [{k: int(v) if isinstance(v, np.int64) else v for k, v in d.items()} for d in search_results]

  # Merge existing results and latest results
  all_results = existing_results + latest_results

  # Write all results to the JSON file
  with open(existing_file_path, 'w') as file:
      json.dump(all_results, file, indent='')


In [None]:
# train best model

parSet_sum = {}
for item in search_results:
    numDense = item['numDense']
    numConv = item['numConv']
    numConvLayer = item['numConvLayer']
    best_val_si_snr = item['best_val_si_snr']

    key = (numDense, numConv, numConvLayer)
    if key in parSet_sum:
       parSet_sum[key] += best_val_si_snr
    else:
        parSet_sum[key] = best_val_si_snr

keys = [k for k in parSet_sum.keys()]
si_snr_sum = [parSet_sum[k] for k in keys]
bestParSet = keys[np.argmax(si_snr_sum)]
print(f'best set : {bestParSet}')


# search_results_json = output_folder + 'hyperparamsearch/' + 'searchResults.json'
# search_results_json = 'output/Versuch1_11.07.2023/searchResults.json'
# search_results_json = 'output/Versuch3_13.07.2023/hyperparamsearch/searchResults_170songs_5epochs_final.json'
# with open(search_results_json, 'r') as file:
#     search_results = json.load(file)

# compression_rates = np.linspace(0.1,0.9,9)
compression_rates = [0.2]
# silence_prob = 0.01


numTestSongs = int(total_num_songs*0.3)
numTopoEpochs = 1
numTotalSongs = 17
percentage_of_song = float(1/(numTotalSongs))
total_num_songs = len(fileNames)
numTopoTrainSongs = int(((total_num_songs*0.7)+1))  # 170
numTopoTrainSongs = 50

# test values
numTopoTrainSongs = 2

batch_size = 64
learning_rate = 0.00008

model_save_path = output_folder + 'train_compression_rates/' + f'model_train_test.keras' #100
histories_save_path = output_folder + 'train_compression_rates/' + 'histories_train_test.csv'

def lr_schedule(epoch):
  return learning_rate
# autoencoder = tf.keras.models.load_model(model_save_path)

lr_scheduler = LearningRateScheduler(lr_schedule)
for c in compression_rates:
  # autoencoder = buildModel(c, bestParSet[0],bestParSet[1],bestParSet[2], learning_rate = learning_rate)
  autoencoder = buildModel(c, 1, 96, 3, learning_rate = learning_rate)
  # autoencoder = tf.keras.models.load_model(model_save_path)

  histories = {}
 
  for idx, filename_train in tqdm(enumerate(fileNames[0:numTopoTrainSongs:])):
    Xt, Xv = loadSongCut(filename_train, numTotalSongs = numTotalSongs, percentage_of_song = percentage_of_song)
    histories[filename_train] = autoencoder.fit(Xt, Xt,
                epochs=numTopoEpochs,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(Xv, Xv),
                callbacks=[lr_scheduler])
    del Xt
    del Xv
    save_period = int(9/numHyperEpochs)
    save_period = 1
    if (idx % save_period == 0) and (idx != 0):
      autoencoder.save(model_save_path)
      del autoencoder
      autoencoder = tf.keras.models.load_model(model_save_path)

      # with open(histories_save_path, 'w', newline='') as csvfile:
      #     writer = csv.writer(csvfile)
      #     writer.writerow(['filename', 'loss', 'val_loss'])
      #     for key, value in histories.items():
      #         writer.writerow([key, value.history['loss'][0], value.history['val_loss'][0]], value.history['loss'][0])
  tf.keras.backend.clear_session()
  autoencoder.save(f'{output_folder}train_compression_rates/model_final_test.keras')
  
  pdfname = f'BestSet, compression_ratio ={c:.1f}_test.pdf'
  # histories = read_histories_from_csv(histories_save_path)
  plot_history(histories, f'{output_folder}train_compression_rates/{pdfname}')
  plotWave(autoencoder, f'{output_folder}train_compression_rates/model_final_testwave_', c)

  testPerformance = evaluateTestSongs(autoencoder, num = numTestSongs)

  a = np.array(testPerformance)
  snippets_si_snr = a[:,2].astype('float')
  full_si_snr_corr = a[:,3].astype('float')



  print(f'snippet_sni_snr {mean_with_error(snippets_si_snr)}')
  print(f'full_si_snr_corr {mean_with_error(full_si_snr_corr)}')

  with open(f'{output_folder}train_compression_rates/modelCompressionRate:{c:.1f}Performance.csv', 'w', newline='') as file:
    writer = csv.writer(file)

    for row in testPerformance:
        writer.writerow(row)

In [15]:
output_folder = "output/Versuch3_13.07.2023/"
model_save_path = f'{output_folder}train_compression_rates/modelCompressionRate0.2.keras' #100
autoencoder = tf.keras.models.load_model(model_save_path)

c = 0.2
plotWave(autoencoder, f'{output_folder}', c)

ucorrected SI-SNR = -22.427609905205685 dB
corrected SI-SNR = 15.169060838413325 dB
Test song predicted and saved: 1760_d958-4.wav_0.2_SNR=15.2.wav


<Figure size 640x480 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>

In [16]:

testPerformance = evaluateTestSongs(autoencoder, num = 30)

with open(f'{output_folder}train_compression_rates/model_final_Performance.csv', 'w', newline='') as file:
    writer = csv.writer(file)

for row in testPerformance:
    writer.writerow(row)

evaluating 30 test songs


0it [00:00, ?it/s]

9173/9173 - 36s - loss: 0.0139 - si_snr: 32.2439 - 36s/epoch - 4ms/step


1it [01:16, 76.91s/it]

Test song predicted and saved: 1751_sy_sps13.wav_SNR=13.7.wav
3645/3645 - 14s - loss: 0.0164 - si_snr: 31.3227 - 14s/epoch - 4ms/step


2it [01:45, 48.76s/it]

Test song predicted and saved: 2442_ps29_02.wav_SNR=13.0.wav
5465/5465 - 23s - loss: 0.0132 - si_snr: 32.3413 - 23s/epoch - 4ms/step


3it [02:30, 46.92s/it]

Test song predicted and saved: 2529_ps05_03.wav_SNR=14.4.wav
14006/14006 - 54s - loss: 0.0091 - si_snr: 34.5042 - 54s/epoch - 4ms/step


4it [04:21, 72.34s/it]

Test song predicted and saved: 2377_qt08_2.wav_SNR=14.0.wav
5982/5982 - 23s - loss: 0.0166 - si_snr: 31.6003 - 23s/epoch - 4ms/step


5it [05:10, 63.57s/it]

Test song predicted and saved: 2373_ps19_01.wav_SNR=12.3.wav
10837/10837 - 42s - loss: 0.0116 - si_snr: 33.2438 - 42s/epoch - 4ms/step


6it [06:37, 71.81s/it]

Test song predicted and saved: 2570_bevs7a.wav_SNR=14.3.wav
17906/17906 - 71s - loss: 0.0098 - si_snr: 34.2575 - 71s/epoch - 4ms/step


7it [09:09, 97.83s/it]

Test song predicted and saved: 2398_op47_2.wav_SNR=12.2.wav
8322/8322 - 32s - loss: 0.0121 - si_snr: 32.4951 - 32s/epoch - 4ms/step


8it [10:17, 88.44s/it]

Test song predicted and saved: 2431_qt16_1.wav_SNR=15.3.wav
8048/8048 - 32s - loss: 0.0132 - si_snr: 32.8980 - 32s/epoch - 4ms/step


9it [11:23, 81.40s/it]

Test song predicted and saved: 2492_ps04_04.wav_SNR=10.9.wav
9120/9120 - 36s - loss: 0.0121 - si_snr: 32.9487 - 36s/epoch - 4ms/step


10it [12:36, 78.94s/it]

Test song predicted and saved: 2345_ps14_03.wav_SNR=13.6.wav
10679/10679 - 41s - loss: 0.0071 - si_snr: 35.4399 - 41s/epoch - 4ms/step


11it [14:07, 82.47s/it]

Test song predicted and saved: 2607_ps16_02.wav_SNR=18.2.wav
6636/6636 - 27s - loss: 0.0116 - si_snr: 33.2445 - 27s/epoch - 4ms/step


12it [15:02, 74.18s/it]

Test song predicted and saved: 2481_qt05_2.wav_SNR=16.2.wav
9928/9928 - 39s - loss: 0.0097 - si_snr: 34.2351 - 39s/epoch - 4ms/step


13it [16:23, 76.24s/it]

Test song predicted and saved: 2568_ps27_02.wav_SNR=10.2.wav
12125/12125 - 48s - loss: 0.0206 - si_snr: 30.6818 - 48s/epoch - 4ms/step


14it [18:03, 83.30s/it]

Test song predicted and saved: 1739_sb99m4.wav_SNR=2.8.wav
3916/3916 - 15s - loss: 0.0144 - si_snr: 32.2266 - 15s/epoch - 4ms/step


15it [18:34, 67.77s/it]

Test song predicted and saved: 2384_qt13_4.wav_SNR=10.6.wav
20904/20904 - 79s - loss: 0.0069 - si_snr: 35.9108 - 79s/epoch - 4ms/step


16it [21:22, 97.81s/it]

Test song predicted and saved: 2366_qt12_2.wav_SNR=16.2.wav
7745/7745 - 30s - loss: 0.0173 - si_snr: 31.9035 - 30s/epoch - 4ms/step


17it [22:24, 87.07s/it]

Test song predicted and saved: 2404_ps31_01.wav_SNR=8.2.wav
7274/7274 - 30s - loss: 0.0169 - si_snr: 31.3698 - 30s/epoch - 4ms/step


18it [23:26, 79.37s/it]

Test song predicted and saved: 2478_ps03_04.wav_SNR=12.3.wav
2878/2878 - 11s - loss: 0.0115 - si_snr: 33.5850 - 11s/epoch - 4ms/step


19it [23:50, 62.87s/it]

Test song predicted and saved: 2620_ps06_03.wav_SNR=9.9.wav
14858/14858 - 59s - loss: 0.0113 - si_snr: 33.6252 - 59s/epoch - 4ms/step


20it [25:53, 80.98s/it]

Test song predicted and saved: 1735_sy_sps94.wav_SNR=12.5.wav
10340/10340 - 43s - loss: 0.0074 - si_snr: 35.1034 - 43s/epoch - 4ms/step


21it [27:22, 83.26s/it]

Test song predicted and saved: 2593_ps18_01.wav_SNR=13.0.wav
7274/7274 - 30s - loss: 0.0162 - si_snr: 31.2561 - 30s/epoch - 4ms/step


22it [28:22, 76.35s/it]

Test song predicted and saved: 2473_ps03_04.wav_SNR=12.6.wav
8979/8979 - 38s - loss: 0.0178 - si_snr: 31.4011 - 38s/epoch - 4ms/step


23it [29:41, 77.27s/it]

Test song predicted and saved: 1733_sy_sps92.wav_SNR=15.6.wav
15835/15835 - 62s - loss: 0.0158 - si_snr: 31.5875 - 62s/epoch - 4ms/step


24it [31:51, 93.04s/it]

Test song predicted and saved: 1742_sb163m2.wav_SNR=18.7.wav
7913/7913 - 31s - loss: 0.0129 - si_snr: 32.5810 - 31s/epoch - 4ms/step


25it [32:56, 84.40s/it]

Test song predicted and saved: 2586_vcs4_2.wav_SNR=16.1.wav
8190/8190 - 35s - loss: 0.0092 - si_snr: 34.0586 - 35s/epoch - 4ms/step


26it [34:07, 80.64s/it]

Test song predicted and saved: 1790_kv_465_3.wav_SNR=18.6.wav
4062/4062 - 17s - loss: 0.0096 - si_snr: 34.0371 - 17s/epoch - 4ms/step


27it [34:41, 66.60s/it]

Test song predicted and saved: 2619_ps06_02.wav_SNR=16.7.wav
7842/7842 - 30s - loss: 0.0155 - si_snr: 32.0384 - 30s/epoch - 4ms/step


28it [35:45, 65.73s/it]

Test song predicted and saved: 2603_ps26_01.wav_SNR=11.9.wav
10168/10168 - 40s - loss: 0.0089 - si_snr: 34.5550 - 40s/epoch - 4ms/step


29it [37:07, 70.52s/it]

Test song predicted and saved: 2393_ps17_03.wav_SNR=11.8.wav
7992/7992 - 28s - loss: 0.0155 - si_snr: 31.1238 - 28s/epoch - 4ms/step


30it [38:07, 76.26s/it]

Test song predicted and saved: 2466_lvb23c.wav_SNR=17.9.wav





ValueError: I/O operation on closed file.

In [24]:
pip install uncertainties


/bin/bash: /home/martin/.local/anaconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by /bin/bash)
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting uncertainties
  Downloading uncertainties-3.1.7-py2.py3-none-any.whl (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.4/98.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting future (from uncertainties)
  Downloading future-0.18.3.tar.gz (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.9/840.9 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: future
  Building wheel for future (setup.py) ... [?25ldone
[?25h  Created wheel for future: filename=future-0.18.3-py3-none-any.whl size=492025 sha256=9adb645a384177abe1f14adfdc0b34e2acadadd05f07bc28fc9740b9f244bcb0
  Stored in directory: /tmp/pip-ephem-wheel-cach