# PMA3: Making and Training Model

## Config

In [1]:
# Mininum Song Lenth
# This is just to adjust the structure of the data
min_song_length_seconds = 60

# Maxinum Song Length
max_song_length_seconds = 540

# Use padding enabled files
padding = True

In [2]:
batch_size = 1

epochs = 10

use_gpu = True

# If you want to load and continue training an pre-existing model
load_model = True

## Loading Songs

In [3]:
from tqdm import tqdm
import os
import pickle
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [4]:
data = []
labels = []

In [5]:
try:
    for filename in tqdm(os.listdir("./data/padding-" + str(padding) + "/min-" + str(min_song_length_seconds) + "_max-" + str(max_song_length_seconds))):
        with open("./data/padding-" + str(padding) + "/min-" + str(min_song_length_seconds) + "_max-" + str(max_song_length_seconds) + "/" + filename, "rb") as file:
            json_pkl = pickle.load(file)

        for frame in json_pkl["fmccs"]:
            data.append((frame, json_pkl['duration']))
            labels.append(json_pkl['rating'])
except:
    raise FileNotFoundError("Could not read pkl files from dataset! This could happen because you have not ran preprocessing yet for your settings! Please make sure your settings are the same!")

In [6]:
def reshape(data):
    assert data.shape[1] % 2 == 0, f"The data cannot be shaped becuase it cannot be divided by 2 without a remainder. Data shape {data.shape}. \
        Failed to reshape to {data.shape[1] / 2}"
    
    data = data.reshape(26, int(data.shape[1]/2))
    data = np.expand_dims(data,axis=-1)
    return data

In [7]:
# Remember that the data is a tuple
# (fmcc, duration)
audio_data = np.array([reshape(x[0]) for x in data])
print(audio_data.shape)

(32013, 26, 1292, 1)


In [8]:
audio_length = np.array([x[1] for x in data])
print(audio_length.shape)

(32013,)


In [9]:
# Reshape `audio_length` to match the shape of `processed_data`
audio_length = np.reshape(audio_length, (audio_length.shape[0], 1, 1, 1))
print(audio_length.shape)

(32013, 1, 1, 1)


In [10]:
# Broadcast `audio_length` to match the shape of `processed_data`
audio_length = np.broadcast_to(audio_length, audio_data.shape[:-1] + (1,))
print(audio_length.shape)

(32013, 26, 1292, 1)


In [11]:
# Concatenate `processed_data` and `audio_length` along the last axis
processed_data = np.concatenate([audio_data, audio_length], axis=-1)
print(processed_data.shape)

(32013, 26, 1292, 2)


In [12]:
lbenc = LabelEncoder()
labels = lbenc.fit_transform(labels)

In [13]:
classes = lbenc.classes_

In [14]:
# split the data for training and testing
x_train,x_val,y_train,y_val = train_test_split(processed_data,labels,test_size=0.2,
                                               shuffle=True,random_state=42)

## Making Model

In [15]:
from tensorflow.keras.layers import InputLayer, Input, Conv2D , AveragePooling2D , GlobalAvgPool2D , Dense, MaxPooling2D
import tensorflow as tf

In [16]:
if use_gpu is False:
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [17]:
input_layer = Input(shape=(26, 1292, 2))

# Convolutional layers
conv1 = Conv2D(256, (3), padding='valid', activation='relu')(input_layer)
conv2 = Conv2D(256, (3), padding='valid', activation='relu')(conv1)
pool1 = AveragePooling2D(pool_size=(3), strides=(2), padding='same')(conv2)
conv3 = Conv2D(256, (3), padding='valid', activation='relu')(pool1)
pool2 = AveragePooling2D(pool_size=(3), strides=(2), padding='same')(conv3)
conv4 = Conv2D(512, (4), padding='valid', activation='relu')(pool2)
global_pool = GlobalAvgPool2D()(conv4)

# Classification layers
dense1 = Dense(256, activation='relu')(global_pool)
dense2 = Dense(128, activation='relu')(dense1)
output = Dense(10, activation='softmax')(dense2)

# Define the model
model = tf.keras.models.Model(inputs=input_layer, outputs=output)

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 26, 1292, 2)]     0         
                                                                 
 conv2d (Conv2D)             (None, 24, 1290, 256)     4864      
                                                                 
 conv2d_1 (Conv2D)           (None, 22, 1288, 256)     590080    
                                                                 
 average_pooling2d (AverageP  (None, 11, 644, 256)     0         
 ooling2D)                                                       
                                                                 
 conv2d_2 (Conv2D)           (None, 9, 642, 256)       590080    
                                                                 
 average_pooling2d_1 (Averag  (None, 5, 321, 256)      0         
 ePooling2D)                                                 

In [18]:
history = model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs)

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.