# This script implements a simple RNN model for the Music Genre Classification task.

In [1]:
# System/zip-handling imports
import os, sys
import zipfile

# Imports tensorflow
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, regularizers

# Imports image handling
import cv2
import numpy as np
import skimage

# For generating training and test data
import random

# Save training progress
import csv
from datetime import datetime
from shutil import copyfile  # Making copy of this file instance (including param settings used)


W1229 00:16:35.822873 140281217849088 __init__.py:321] Limited tf.compat.v2.summary API due to missing TensorBoard installation.
W1229 00:16:35.851773 140281217849088 __init__.py:321] Limited tf.compat.v2.summary API due to missing TensorBoard installation.
W1229 00:16:35.867061 140281217849088 __init__.py:352] Limited tf.summary API due to missing TensorBoard installation.


# Get sorted list of training file names:

In [2]:
# Get access to zip-archive
archive = zipfile.ZipFile('../data/spectrograms.zip', 'r')
imgdata = archive.read('spectrograms/spectrogram_0000.png')

files = sorted([f for f in archive.namelist()[1:] if f.startswith('spectrograms/') and f.endswith('.png')])

print(files)

['spectrograms/spectrogram_0000.png', 'spectrograms/spectrogram_0001.png', 'spectrograms/spectrogram_0002.png', 'spectrograms/spectrogram_0003.png', 'spectrograms/spectrogram_0004.png', 'spectrograms/spectrogram_0005.png', 'spectrograms/spectrogram_0006.png', 'spectrograms/spectrogram_0007.png', 'spectrograms/spectrogram_0008.png', 'spectrograms/spectrogram_0009.png', 'spectrograms/spectrogram_0010.png', 'spectrograms/spectrogram_0011.png', 'spectrograms/spectrogram_0012.png', 'spectrograms/spectrogram_0013.png', 'spectrograms/spectrogram_0014.png', 'spectrograms/spectrogram_0015.png', 'spectrograms/spectrogram_0016.png', 'spectrograms/spectrogram_0017.png', 'spectrograms/spectrogram_0018.png', 'spectrograms/spectrogram_0019.png', 'spectrograms/spectrogram_0020.png', 'spectrograms/spectrogram_0021.png', 'spectrograms/spectrogram_0022.png', 'spectrograms/spectrogram_0023.png', 'spectrograms/spectrogram_0024.png', 'spectrograms/spectrogram_0025.png', 'spectrograms/spectrogram_0026.png', 

# JFF: Get a feeling for the nature of the training and evaluation data:

In [3]:
# Interpret image-data as image
image = cv2.imdecode(np.frombuffer(imgdata, dtype=np.uint8), 1)

# Get data and visual representation of original image
print('ORIGINAL IMAGE:')
print('Type of image:')
print(type(image))
print('Dimension of a single image file:')
print(image.shape)

# Show image visually (press any key in opening window in order to proceed in code...)
cv2.imshow("Image", image)
#cv2.waitKey(0)
#cv2.destroyAllWindows()

# Transpose image and repeat the above...
#order_after_transpose = [1,0,2]
#image = np.transpose(image, axes=order_after_transpose)
image = np.rot90(image, 3) # Rotate 3* by 90° (each) 
print('TRANSPOSED IMAGE:')
print('Type of image:')
print(type(image))
print('Dimension of a single image file:')
print(image.shape)

cv2.imshow("Image transposed", image)
#cv2.waitKey(0)
#cv2.destroyAllWindows()


print('\n\n... Full pipeline demonstration ...\n\n')

##### FULL READ-IN-PIPELINE:
# Interpret image-data as image
image = cv2.imdecode(np.frombuffer(imgdata, dtype=np.uint8), 1)

# Normalize image's colors to range [0, 1]
image = image / 255.0

# Rotate by 270° (=3*90°) 
image = np.rot90(image, 3)

# Grayscale image
gray_image = skimage.color.rgb2gray(image)

cv2.imshow("Final Image", gray_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

ORIGINAL IMAGE:
Type of image:
<class 'numpy.ndarray'>
Dimension of a single image file:
(128, 128, 3)
TRANSPOSED IMAGE:
Type of image:
<class 'numpy.ndarray'>
Dimension of a single image file:
(128, 128, 3)


... Full pipeline demonstration ...




# Read in both training and testing data from zip archive:

In [4]:
data_set = []
# Data storage
combined_data = np.empty([1, 128, 128])

# Read in images & store processed instances
for f_name in files:
    # Get image data from zip file
    zip_img_data = archive.read(f_name)
    image = cv2.imdecode(np.frombuffer(zip_img_data, dtype=np.uint8), 1)
    
    # Normalize image's colors to range [0, 1]
    image = image / 255.0
    
    # Rotate by 270° (=3*90°) 
    image = np.rot90(image, 3)
    
    # Grayscale image
    gray_image = skimage.color.rgb2gray(image)

    # Store grayscaled image
    combined_data = np.append(combined_data, [gray_image], axis=0)
    
# Remove initial, empty datapoint
combined_data = combined_data[1:, :, :]

print('Done reading in... Shape of data array:')
print(combined_data.shape)
print('Done.')

Done reading in... Shape of data array:
(1000, 128, 128)
Done.


# Read in labels:

In [5]:
labels_path = '../data/labels.txt'

combined_labels = np.empty([1])

with open(labels_path, 'r') as file:
    for line in file:
        combined_labels = np.append(combined_labels, [int(line)])

# Remove initial, empty datapoint
combined_labels = combined_labels[1:] 
print(combined_labels)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.

# Divide data into train and test data:

------------------------------------------------------
Training data will be contained in:    training_data
Tetsing  data will be contained in:    testing_data

Training labels will be contained in:  training_labels
Tetsing  labels will be contained in:  testing_labels

In [6]:
# Get set of test-indices which indicates the training data points that have to be reserved for training
percentage_test_data = 0.15
population = range(len(combined_labels))
nr_samples = int(percentage_test_data * len(combined_labels))

test_indices = random.sample(population, nr_samples)
test_indices = sorted(test_indices)

print('Nr. test indices: ' + str(len(test_indices)))
print('Test indices: ' + str(test_indices))


# Split data into training- and test data, respectively - Preparation: Create empty arrays in which to later insert data
test_len = len(test_indices)
train_len = len(combined_labels)-len(test_indices)
training_data, training_labels = np.empty([train_len, 128, 128]), np.empty([train_len])
testing_data, testing_labels = np.empty([test_len, 128, 128]), np.empty([test_len])

test_idx_list_idx = 0
i = 0

# Iterate through all data and assign each data point either to training data or testing data
for data_idx in range(len(combined_labels)):

    if test_idx_list_idx < nr_samples and data_idx == test_indices[test_idx_list_idx]:
        testing_data[test_idx_list_idx, :, :] = combined_data[data_idx, :, :]
        testing_labels[test_idx_list_idx] = combined_labels[data_idx]
        test_idx_list_idx += 1
        
    else:
        
        training_data[i, :, :] = combined_data[data_idx, :, :]
        training_labels[i] = combined_labels[data_idx]
        i += 1
        

# NOT FOR RNN architecture!
#training_data = training_data.reshape([len(training_labels), 128, 128, 1])
#testing_data = testing_data.reshape([len(testing_labels), 128, 128, 1])
        
print('Final:')
print(training_data.shape)
print(training_labels.shape)
print(testing_data.shape)
print(testing_labels.shape)


Nr. test indices: 150
Test indices: [2, 4, 17, 22, 26, 29, 34, 48, 53, 81, 83, 117, 125, 129, 132, 133, 136, 149, 161, 162, 166, 186, 192, 193, 200, 209, 213, 214, 215, 221, 222, 231, 242, 247, 259, 263, 270, 280, 286, 293, 304, 316, 326, 328, 331, 337, 342, 350, 358, 363, 366, 368, 382, 407, 409, 424, 425, 426, 435, 437, 446, 451, 459, 472, 487, 493, 497, 503, 521, 526, 527, 532, 533, 538, 542, 546, 548, 560, 567, 573, 577, 578, 594, 596, 603, 605, 609, 623, 629, 632, 642, 650, 659, 667, 675, 685, 692, 695, 697, 709, 710, 711, 714, 726, 730, 731, 743, 748, 749, 751, 759, 764, 768, 776, 778, 779, 780, 785, 787, 803, 808, 810, 833, 834, 837, 842, 843, 846, 856, 859, 862, 889, 896, 897, 898, 906, 913, 916, 917, 925, 938, 945, 950, 961, 969, 971, 980, 985, 993, 994]
Final:
(850, 128, 128)
(850,)
(150, 128, 128)
(150,)


# Set up the RNN architecture:

In [44]:
# Reset tf sessions
tf.keras.backend.clear_session()  # Destroys the current TF graph and creates a new one.

dimensions = 128  # Input dimension: 128x128
units = 128       # Dimensionality of RNN output tensor
classes = 10      # Number of output nodes in final layers (=nr of distinct classes)

# Set up model architecture in terms of its layers
model = models.Sequential()

model.add(layers.SimpleRNN(units, input_shape=(None, dimensions),
                           kernel_regularizer=regularizers.l2(0.001),
                           recurrent_regularizer=regularizers.l2(0.001),
                           bias_regularizer=regularizers.l2(0.001), return_sequences=True  #return_sequences = 1 output of dim=units per time-step/(=here:row)
         )) #Alternatively: layers.SimpleRNN || layers.LSTM || layers.GRU

model.add(layers.SimpleRNN(units, input_shape=(None, dimensions),
                           kernel_regularizer=regularizers.l2(0.001),
                           recurrent_regularizer=regularizers.l2(0.001),
                           bias_regularizer=regularizers.l2(0.001)
         )) #Alternatively: layers.SimpleRNN || layers.LSTM || layers.GRU

# The output of GRU will be a 3D tensor of shape (batch_size, 256):
#model.add(layers.GRU(256))


#model.add(layers.Dropout(0.1))

model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.03)))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(classes, activation='softmax'))

# Note on regularizer(s), copied from https://www.tensorflow.org/tutorials/keras/overfit_and_underfit:
# l2(0.001) means that every coefficient in the weight matrix of the layer will add 0.001 * weight_coefficient_value**2
# to the total loss of the network.

# Print summary
model.summary()

# Compile model & make some design choices
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001, #0.001
                                           beta_1=0.9,
                                           beta_2=0.999,
                                           epsilon=1e-07,
                                           amsgrad=False,
                                           name='Adam'
                                           ),
              loss='sparse_categorical_crossentropy',  # Capable of working with regularization
              metrics=['accuracy', 'sparse_categorical_crossentropy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, None, 128)         32896     
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 128)               32896     
_________________________________________________________________
dense (Dense)                (None, 512)               66048     
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 136,970
Trainable params: 136,970
Non-trainable params: 0
_________________________________________________________________


# Execute training:

Start training process:
    Run X times Y tensorflow-epochs and save a model as checkpoint after any Y epochs. 
    FIXME: Bit hacky solution, yet, but can be prettyfied.
    
IMPORTANT: 'accuracy'     == accuracy achieved during training on training data;  * the UN-important measure
           'val_accuracy' == accuracy achieved on TEST data AFTER training epoch; * the important measure

In [46]:
# Set up folder for data gathering during training process
now = datetime.now()
TIME_STAMP = now.strftime("_%Y_%d_%m__%H_%M_%S__%f")
MODEL_ID = 'Model_' + TIME_STAMP + '/'
path = 'TrainingRun/' + MODEL_ID

if not os.path.exists(path):
    os.makedirs(path)

# Set up documentation (csv doc) of training progress
with open(path+'training_progress.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["epoch", "loss", "accuracy", "val_loss", "val_accuracy", "sparse_categorical_crossentropy"])
    file.close()


# Save initial model - For preventing memory leaks
name = 'cnn_model_0_acc_0'
model.save(path+name+'.h5')

# Run training
repetitions = 10    # How many repetitions of given nr of epochs
eps = 10            # tf-Epochs
accuracy = 0
min_acc = 0.85

for i in range(repetitions):
    # Live terminal update
    print('Completed epochs so far: ' + str(i*eps))
    
    # Prevent memory leakage
    tf.keras.backend.clear_session()  # Destroys the current TF graph and creates a new one.
    model = tf.keras.models.load_model(path+name+'.h5')  # Reload model

    # Perform x epochs of training
    history = model.fit(training_data, training_labels,
                        epochs=eps,
                        validation_data=(testing_data, testing_labels))
    
    # Get string representation of performed number of training epochs & new model name
    epoch = str((i+1)*eps)
    name = 'cnn_model_'+epoch+'_acc_'+str(history.history['val_accuracy'])
    
    # Save the entire model as a checkpoint and/or final model to a HDF5 file.
    model.save(path+name+'.h5')
    
    # Record training progress
    with open(path+'training_progress.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([epoch,
                         history.history["loss"][0], 
                         history.history["accuracy"][0], 
                         history.history["val_loss"][0], 
                         history.history["val_accuracy"][0],
                         history.history["sparse_categorical_crossentropy"][0]
                         ])
        file.close()

    if accuracy >= min_acc and history.history['val_accuracy'][0] < accuracy:
        # Drop in accuracy on evaluation data. Overfitting?
        break
    else:
        accuracy = history.history['val_accuracy'][0]
print('Done.')

Completed epochs so far: 0
Train on 850 samples, validate on 150 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Completed epochs so far: 10
Train on 850 samples, validate on 150 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

KeyboardInterrupt: 