This script implements a simple baseline-CNN model for the Music Genre Classification task.

In [1]:
# System/zip-handling imports
import os, sys
import zipfile

# Imports tensorflow
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, regularizers

# Imports image handling
import cv2
import numpy as np
import skimage

# For generating training and test data
import random

# Save training progress
import csv
from datetime import datetime
from shutil import copyfile  # Making copy of this file instance (including param settings used)

W1218 16:27:24.092434 140281984849664 __init__.py:321] Limited tf.compat.v2.summary API due to missing TensorBoard installation.
W1218 16:27:24.119812 140281984849664 __init__.py:321] Limited tf.compat.v2.summary API due to missing TensorBoard installation.
W1218 16:27:24.135915 140281984849664 __init__.py:352] Limited tf.summary API due to missing TensorBoard installation.


Get a feeling for the nature of the training and evaluation data:

In [2]:
# Get access to zip-archive
archive = zipfile.ZipFile('../data/spectrograms.zip', 'r')
imgdata = archive.read('spectrograms/spectrogram_0000.png')

# Interpret image-data as image
image = cv2.imdecode(np.frombuffer(imgdata, dtype=np.uint8), 1)

# Show image visually (press any key in opening window in order to proceed in code...)
cv2.imshow("Image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Print some data specs
print('Type of image:')
print(type(image))
print('Dimension of a single image file:')
print(image.shape)

Type of image:
<class 'numpy.ndarray'>
Dimension of a single image file:
(128, 128, 3)


Check whether data files are read in in ascending order: otherwise, labels don't match data!!!

# Sort training files:

In [9]:
files = sorted([f for f in archive.namelist()[1:] if f.startswith('spectrograms/') and f.endswith('.png')])

print(files)

['spectrograms/spectrogram_0000.png', 'spectrograms/spectrogram_0001.png', 'spectrograms/spectrogram_0002.png', 'spectrograms/spectrogram_0003.png', 'spectrograms/spectrogram_0004.png', 'spectrograms/spectrogram_0005.png', 'spectrograms/spectrogram_0006.png', 'spectrograms/spectrogram_0007.png', 'spectrograms/spectrogram_0008.png', 'spectrograms/spectrogram_0009.png', 'spectrograms/spectrogram_0010.png', 'spectrograms/spectrogram_0011.png', 'spectrograms/spectrogram_0012.png', 'spectrograms/spectrogram_0013.png', 'spectrograms/spectrogram_0014.png', 'spectrograms/spectrogram_0015.png', 'spectrograms/spectrogram_0016.png', 'spectrograms/spectrogram_0017.png', 'spectrograms/spectrogram_0018.png', 'spectrograms/spectrogram_0019.png', 'spectrograms/spectrogram_0020.png', 'spectrograms/spectrogram_0021.png', 'spectrograms/spectrogram_0022.png', 'spectrograms/spectrogram_0023.png', 'spectrograms/spectrogram_0024.png', 'spectrograms/spectrogram_0025.png', 'spectrograms/spectrogram_0026.png', 

# Read in both training and testing data from zip archive:


    
    # TODO: TRANSPOSE WHEN USING RNN

In [10]:

data_set = []
# Data storage
combined_data = np.empty([1, 128, 128])

# Read in images & store processed instances
for f_name in files:
    # Get image data from zip file
    zip_img_data = archive.read(f_name)
    image = cv2.imdecode(np.frombuffer(zip_img_data, dtype=np.uint8), 1)
    
    # Normalize image's colors to range [0, 1]
    image = image / 255.0

    #cv2.imshow("Normalized Image", image)
    #cv2.waitKey(0)

    # Grayscale image
    gray_image = skimage.color.rgb2gray(image)

    #cv2.imshow("Grayscale Image", gray_image)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()

    # Store grayscaled image
    combined_data = np.append(combined_data, [gray_image], axis=0)
    
# Remove initial, empty datapoint
combined_data = combined_data[1:, :, :]

print('Done reading in... Shape of data array:')
print(combined_data.shape)
print('Done.')

Done reading in... Shape of data array:
(1000, 128, 128)
Done.


# Read in labels

In [11]:
# Read in labels
labels_path = '../data/labels.txt'

combined_labels = np.empty([1])

with open(labels_path, 'r') as file:
    #file.readline() # skip the first line
    #labels = [int(line) for line in file]
    for line in file:
        combined_labels = np.append(combined_labels, [int(line)])

# Remove initial, empty datapoint
combined_labels = combined_labels[1:] 
print(combined_labels)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.

# Divide data into train and test data.

------------------------------------------------------
Training data will be contained in:    training_data
Tetsing  data will be contained in:    testing_data

Training labels will be contained in:  training_labels
Tetsing  labels will be contained in:  testing_labels

In [12]:
# Divide data into train and test data

# Get set of test-indices which indicates the training data points that have to be reserved for training
percentage_test_data = 0.2
population = range(len(combined_labels))
nr_samples = int(percentage_test_data * len(combined_labels))

test_indices = random.sample(population, nr_samples)
test_indices = sorted(test_indices)

print('Nr. test indices: ' + str(len(test_indices)))
print('Test indices: ' + str(test_indices))


# Split data into training- and test data, respectively - Preparation: Create empty arrays to contain data
test_len = len(test_indices)
train_len = len(combined_labels)-len(test_indices)
training_data, training_labels = np.empty([train_len, 128, 128]), np.empty([train_len])
testing_data, testing_labels = np.empty([test_len, 128, 128]), np.empty([test_len])

test_idx_list_idx = 0
i = 0

# Iterate through all data and assign each data point either to training data or testing data
for data_idx in range(len(combined_labels)):

    if test_idx_list_idx < nr_samples and data_idx == test_indices[test_idx_list_idx]:
        testing_data[test_idx_list_idx, :, :] = combined_data[data_idx, :, :]
        testing_labels[test_idx_list_idx] = combined_labels[data_idx]
        test_idx_list_idx += 1
        
    else:
        
        training_data[i, :, :] = combined_data[data_idx, :, :]
        training_labels[i] = combined_labels[data_idx]
        i += 1
        

training_data = training_data.reshape([len(training_labels), 128, 128, 1])

testing_data = testing_data.reshape([len(testing_labels), 128, 128, 1])
        
print('Final:')
print(training_data.shape)
print(training_labels.shape)
print(testing_data.shape)
print(testing_labels.shape)


Nr. test indices: 200
Test indices: [10, 13, 14, 34, 36, 41, 46, 47, 50, 64, 65, 71, 74, 79, 82, 83, 85, 90, 98, 101, 102, 105, 111, 113, 115, 119, 122, 143, 147, 155, 159, 160, 162, 169, 176, 177, 179, 182, 188, 193, 202, 207, 218, 222, 224, 228, 234, 236, 238, 242, 243, 248, 258, 260, 261, 263, 265, 272, 275, 276, 280, 285, 289, 295, 300, 306, 308, 313, 314, 327, 337, 342, 343, 355, 364, 369, 380, 384, 386, 389, 391, 393, 394, 402, 407, 412, 413, 415, 424, 425, 440, 442, 444, 446, 451, 452, 456, 459, 461, 463, 481, 486, 492, 496, 504, 526, 528, 530, 537, 538, 543, 544, 563, 565, 569, 585, 597, 604, 606, 607, 614, 617, 623, 624, 631, 644, 654, 656, 665, 679, 686, 690, 691, 694, 695, 696, 698, 705, 717, 718, 724, 726, 728, 735, 736, 738, 740, 749, 750, 767, 774, 778, 784, 798, 811, 814, 819, 820, 825, 827, 837, 839, 845, 851, 859, 861, 869, 871, 872, 876, 880, 884, 890, 895, 896, 899, 907, 910, 913, 914, 918, 922, 928, 929, 936, 938, 939, 944, 945, 949, 952, 955, 957, 959, 964, 970, 99

# Set up the CNN architecture

In [14]:
# Set up model

# Reset tf sessions...
tf.keras.backend.clear_session()  # Destroys the current TF graph and creates a new one.

dimensions = 128
classes = 10

# Set up model architecture in terms of its layers
model = models.Sequential()

model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(dimensions, dimensions, 1),
                                                       kernel_regularizer=regularizers.l2(0.1)))

model.add(layers.Dropout(0.2))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.1)))

#model.add(layers.Dropout(0.1))
#model.add(layers.MaxPooling2D((2, 2)))

#model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.5)))

model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())

model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.5)))

model.add(layers.Dropout(0.3))

model.add(layers.Dense(classes, activation='softmax'))

# Note on regularizer(s), copied from https://www.tensorflow.org/tutorials/keras/overfit_and_underfit:
# l2(0.001) means that every coefficient in the weight matrix of the layer will add 0.001 * weight_coefficient_value**2
# to the total loss of the network.

# Print summary
model.summary()

# Compile model & make some design choices
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.0001, #0.001
                                           beta_1=0.9,
                                           beta_2=0.999,
                                           epsilon=1e-07,
                                           amsgrad=False,
                                           name='Adam'
                                           ),
              loss='sparse_categorical_crossentropy',  # Capable of working with regularization
              metrics=['accuracy', 'sparse_categorical_crossentropy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 64)      640       
_________________________________________________________________
dropout (Dropout)            (None, 126, 126, 64)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 128)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 30, 128)       0         
_________________________________________________________________
flatten (Flatten)            (None, 115200)            0

# Execute training

Start training process:
    Run X times Y tensorflow-epochs and save a model as checkpoint after any Y epochs. 
    FIXME: Bit hacky solution, yet, but can be prettyfied.
    
IMPORTANT: 'accuracy'     == accuracy achieved during training on training data;  * the UN-important measure
           'val_accuracy' == accuracy achieved on TEST data AFTER training epoch; * the important measure

In [None]:
# Set up folder for data gathering during training process
now = datetime.now()
TIME_STAMP = now.strftime("_%Y_%d_%m__%H_%M_%S__%f")
MODEL_ID = 'Model_' + TIME_STAMP + '/'
path = 'TrainingRun/' + MODEL_ID

if not os.path.exists(path):
    os.makedirs(path)

# Set up documentation (csv doc) of training progress
with open(path+'training_progress.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["epoch", "loss", "accuracy", "val_loss", "val_accuracy", "sparse_categorical_crossentropy"])
    file.close()


# Save initial model - For preventing memory leaks
name = 'cnn_model_0_acc_0'
model.save(path+name+'.h5')

# Run training
repetitions = 10    # How many repetitions of given nr of epochs
eps = 10            # tf-Epochs
accuracy = 0
min_acc = 0.85

for i in range(repetitions):
    # Live terminal update
    print('Completed epochs so far: ' + str(i*eps))
    
    # Prevent memory leakage
    tf.keras.backend.clear_session()  # Destroys the current TF graph and creates a new one.
    model = tf.keras.models.load_model(path+name+'.h5')  # Reload model

    # Perform x epochs of training
    history = model.fit(training_data, training_labels,
                        epochs=eps,
                        validation_data=(testing_data, testing_labels))
    
    # Get string representation of performed number of training epochs & new model name
    epoch = str((i+1)*eps)
    name = 'cnn_model_'+epoch+'_acc_'+str(history.history['val_accuracy'])
    
    # Save the entire model as a checkpoint and/or final model to a HDF5 file.
    model.save(path+name+'.h5')
    
    # Record training progress
    with open(path+'training_progress.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([epoch,
                         history.history["loss"][0], 
                         history.history["accuracy"][0], 
                         history.history["val_loss"][0], 
                         history.history["val_accuracy"][0],
                         history.history["sparse_categorical_crossentropy"][0]
                         ])
        file.close()

    if accuracy >= min_acc and history.history['val_accuracy'][0] < accuracy:
        # Drop in accuracy on evaluation data. Overfitting?
        break
    else:
        accuracy = history.history['val_accuracy'][0]
print('Done.')

Completed epochs so far: 0
Train on 800 samples, validate on 200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Completed epochs so far: 10
Train on 800 samples, validate on 200 samples
Epoch 1/10
