In [None]:
import librosa
import librosa.display
import os
import numpy as np
from collections import Counter
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
lab = {'reggae':0,
       'rock':1,
       'country':2,
       'disco':3,
       'hiphop':4,
       'classical':5,
       'metal':6,
       'blues':7,
       'jazz':8,
       'pop':9
}      
nBatch = 6
batchDim = list()
batchIndex = list()
nsong = 100
currentIdx = 0
for i in range(nBatch):
  nBatchSong =int(nsong / nBatch)
  nBatch -= 1
  batchDim.append(nBatchSong)
  nsong -= nBatchSong

  nextIdx = currentIdx + nBatchSong -1
  idxs = (currentIdx, nextIdx)

  currentIdx += nBatchSong
  batchIndex.append(idxs)

#example with nBatch = 6
#batchDim = [17,17,17,17,16,16]
#batchIndex =[(0,16),(17,33),(34,50),(51,67),(68,83),(84,99)]
# 1) 17songs/genre indexes -> (0-16)
# 2) 17songs/genre indexes -> (17-33)
# 3) 17songs/genre indexes -> (34-50)
# 4) 17songs/genre indexes -> (51-67)
# 5) 16songs/genre indexes -> (68-83)
# 6) 16songs/genre indexes -> (84-99)

def loadBatch(index):
  data = []
  labels = []
  dataset_path = "/content/drive/MyDrive/genres"

  l = list()

  segments_per_track = 21
  overlapping = 0.75
  track_length = 30 #seconds
  sample_length = 5 #seconds

  for i,(dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
    if dirpath is not dataset_path:
      label = dirpath.split('/')[-1]
      print("Loading %s"%label)
      for i,f in enumerate(filenames):
        if i>=batchIndex[index][0] and i<=batchIndex[index][1]:
          file_path = os.path.join(dirpath,f)
          signal, sample_rate = librosa.load(file_path)
          truncated = 0
          if len(signal) < 661500: # if the track is less than 30 seconds i dont take the last segment
            l.append(label)
            truncated = 1
          for s in range(segments_per_track - truncated ):
            start_sample_index = int(s * (1-overlapping) * sample_length * sample_rate) #s = 0 -> 0, s = 1 -> int(27562.5) = 27562
            end_sample_index = int(start_sample_index + sample_length * sample_rate-1) #s = 0 -> 27561 , s = 1 -> 55123
            sample = signal[start_sample_index:end_sample_index+1]
            data.append(sample)
            labels.append(lab[label])
  return np.array(data),np.array(labels)

In [None]:
batchDim, batchIndex

([16, 16, 17, 17, 17, 17],
 [(0, 15), (16, 31), (32, 48), (49, 65), (66, 82), (83, 99)])

In [None]:
filter = [(2,128), (6,256), (1,512)]
kernelS = 3
poolS = 3
stride = [3, 1]

In [None]:
x_train, y_train=loadBatch(2)
x_valid, y_valid=loadBatch(3)

Loading reggae
Loading rock
Loading country
Loading disco
Loading hiphop
Loading classical
Loading metal
Loading blues
Loading jazz
Loading pop
Loading reggae
Loading rock
Loading country
Loading disco
Loading hiphop
Loading classical
Loading metal
Loading blues
Loading jazz
Loading pop


In [None]:
x_train.shape

(3568, 110250)

In [None]:
import time
def unison_shuffled_copies(a, b):
    np.random.seed(int(time.time()))
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [None]:
x_train, y_train = unison_shuffled_copies(x_train, y_train)
x_valid, y_valid = unison_shuffled_copies(x_valid, y_valid)

In [None]:
import soundfile as sf
#check if everything correct
sample_rate = 22050
sf.write('prova.wav', x_train[100,:],sample_rate)

print(Counter(y_train))
y_train[100]

Counter({0: 357, 9: 357, 3: 357, 7: 357, 8: 357, 6: 357, 5: 357, 2: 357, 4: 356, 1: 356})


8

In [None]:
# Residual Layer
def res1d(input, nFilters, kernelSize, stride):
  y = keras.layers.Conv1D(filters = nFilters, kernel_size = kernelSize, strides = stride, padding = "same",kernel_regularizer=keras.regularizers.l1_l2(0,0.001))(input)  
  y = keras.layers.BatchNormalization()(y)
  y = keras.layers.LeakyReLU()(y)
  y = keras.layers.Conv1D(filters = nFilters, kernel_size = kernelSize, strides = stride, padding = "same",kernel_regularizer=keras.regularizers.l1_l2(0,0.001))(y)
  y = keras.layers.BatchNormalization()(y)
  # if the shape of the shortcut and y aren't equal, we add a convolutional1D layer and a batch normalization to the shortcut
  if input.shape[2] != y.shape[2]:
    shortcut = keras.layers.Conv1D(filters = nFilters, kernel_size = kernelSize, strides = stride, padding = "same")(input)  
    shortcut = keras.layers.BatchNormalization()(shortcut)
  else:
    shortcut = input
  y = keras.layers.Add()([shortcut, y])
  y = keras.layers.LeakyReLU()(y)
  return y


In [None]:
x = keras.Input(shape=(110250,1))

#First Convolutional Layer:
y = keras.layers.Conv1D(filters=filter[0][1], kernel_size = kernelS, strides = stride[0], padding = "same",kernel_regularizer=keras.regularizers.l1_l2(0,0.001))(x)

#Series of Residual Layers and MaxPools:
for filterType in filter:
  for i in range(filterType[0]):
    y = res1d(y, filterType[1], kernelS, stride[1])
    print(y.shape)
    y = keras.layers.MaxPooling1D(pool_size = poolS, strides = stride[0])(y)
    print(y.shape)

#Last convolutional layer
y = keras.layers.Conv1D(filters=filter[2][1], kernel_size = 1, strides = stride[1], padding = "same")(y)

#Last layers for output
y = keras.layers.Flatten()(y)
predictions = keras.layers.Dense(10, activation='softmax')(y)

(None, 36750, 128)
(None, 12250, 128)
(None, 12250, 128)
(None, 4083, 128)
(None, 4083, 256)
(None, 1361, 256)
(None, 1361, 256)
(None, 453, 256)
(None, 453, 256)
(None, 151, 256)
(None, 151, 256)
(None, 50, 256)
(None, 50, 256)
(None, 16, 256)
(None, 16, 256)
(None, 5, 256)
(None, 5, 512)
(None, 1, 512)


In [None]:
#model = keras.Model(inputs=x, outputs=predictions)
model = keras.models.load_model("/content/drive/MyDrive/TrainingRegularization/1Train2Valid/model.h5")

In [None]:
# Compiling 
model.compile(loss = keras.losses.SparseCategoricalCrossentropy(),
              optimizer='adam', metrics=keras.metrics.sparse_categorical_accuracy)


In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 110250, 1)]  0           []                               
                                                                                                  
 conv1d_22 (Conv1D)             (None, 36750, 128)   512         ['input_4[0][0]']                
                                                                                                  
 conv1d_23 (Conv1D)             (None, 36750, 128)   49280       ['conv1d_22[0][0]']              
                                                                                                  
 batch_normalization_20 (BatchN  (None, 36750, 128)  512         ['conv1d_23[0][0]']              
 ormalization)                                                                              

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience = 4)
callbacks_list = [early_stop]

In [None]:
n_epochs = 100
n_batch = 32
# Fitting 
model.fit(x_train, y_train,validation_data = (x_valid, y_valid), epochs=n_epochs, batch_size=n_batch, callbacks=callbacks_list )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


<keras.callbacks.History at 0x7f9a8c1fead0>

In [None]:
model.save("/content/drive/MyDrive/TrainingRegularization/2Train3Valid/model.h5")

  layer_config = serialize_layer_fn(layer)


In [None]:
x_test, y_test=loadBatch(5)

In [None]:
y_hat = model.predict(x_test)

In [None]:
#Testing
accuracy_test = keras.metrics.SparseCategoricalAccuracy()(y_test,y_hat)
print('Accuracy (test dataset):%1.2f%%'% (accuracy_test * 100))

In [None]:
#Aggregation of the segments in the songs
song=list()
for genre in range(10):
  seg = y_hat[y_test==genre,:]
  pos = 0
  resto = len(seg)%21
  for i in range(int(len(seg)/21)):
    counts = np.mean(seg[pos*21:pos*21 + 21],axis=0)
    pos += 1
    song.append(counts)
  if resto > 0:
    counts = np.mean(seg[pos*21:],axis=0)
    song.append(counts)


songTrue=list()
for genre in range(10):
  seg = y_test[y_test==genre]
  pos = 0
  resto = len(seg)%21
  for i in range(int(len(seg)/21)):
    counts = np.bincount(seg[pos*21:pos*21 + 21])
    pos += 1
    songTrue.append(np.argmax(counts))
  if resto > 0:
    counts = np.bincount(seg[pos*21:])
    songTrue.append(np.argmax(counts))

In [None]:
#Testing Aggregate
accuracy_test = keras.metrics.SparseCategoricalAccuracy()(songTrue,song)
print('Accuracy (test dataset):%1.2f%%'% (accuracy_test * 100))