In [39]:
# aws stuff
# import s3fs
# fs = s3fs.S3FileSystem() 
# with fs.open("spectrogramdatabucket/Spectrogram_Data_Labels.npy") as f:
#     labels = np.load(f)
# with fs.open("spectrogramdatabucket/Spectrogram_Data.npy") as d:
#     data = np.load(d)
    
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import (Dense, Dropout, Flatten, Conv2D, MaxPooling2D,
                                            BatchNormalization, Input, concatenate, GlobalAveragePooling2D)
from tensorflow.python.keras import utils
import json
import tensorflow as tf
 
def inception_module(layer_in, f1, f2_in, f2_out, f3_in, f3_out, f4_out, n):
    # 1x1 conv
    conv1 = Conv2D(f1, (1,1), padding='same', activation='relu',name=f'1x1_conv__{n}')(BatchNormalization(axis=-1)(layer_in))
    # 3x3 conv
    conv3 = Conv2D(f2_in, (1,1), padding='same', activation='relu',)(BatchNormalization(axis=-1)(layer_in))
    conv3 = Conv2D(f2_out, (3,3), padding='same', activation='relu',name=f'3x3_conv__{n}')(BatchNormalization(axis=-1)(conv3))
    # 5x5 conv
    conv5 = Conv2D(f3_in, (1,1), padding='same', activation='relu')(BatchNormalization(axis=-1)(layer_in))
    conv5 = Conv2D(f3_out, (5,5), padding='same', activation='relu',name=f'5x5_conv__{n}')(BatchNormalization(axis=-1)(conv5))
    # 3x3 max pooling
    pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(layer_in)
    pool = Conv2D(f4_out, (1,1), padding='same', activation='relu')(BatchNormalization(axis=-1)(pool))
    # concatenate filters, assumes filters/channels last
    layer_out = concatenate([conv1, conv3, conv5, pool], axis=-1)
    return layer_out
 

paddings = tf.constant([[0, 0],[1,1],[1,1],[0,0]])
visible = Input(shape=(128,647,1))
padded_input = tf.pad(visible,paddings,"CONSTANT")
x = Conv2D(32,(3,3),activation='relu',input_shape=(128,647,1))(padded_input)
x = BatchNormalization(axis=-1)(x)
incept_1_input = MaxPooling2D(pool_size=(1,4),name='incept_1_input')(x)
 
f1 = 32
f2_in = 64
f2_out = 32
f3_in = 16
f3_out = 32
f4_out = 32
 
#Inception Module
incept_1 = inception_module(incept_1_input, f1, f2_in, f2_out, f3_in, f3_out, f4_out,1)
 
incept_2_input = concatenate([incept_1,incept_1_input],name='incept_2_input')
incept_2 = inception_module(incept_2_input, f1, f2_in, f2_out, f3_in, f3_out, f4_out,2)
 
incept_3_input = concatenate([incept_2_input,incept_2],name='incept_3_input')
incept_3 = inception_module(incept_3_input, f1, f2_in, f2_out, f3_in, f3_out, f4_out,3)
 
incept_3_output = concatenate([incept_3,incept_3_input],name='incept_3_output')
 
#Transition Layers
x = BatchNormalization(axis=-1)(Conv2D(32,(1,1))(incept_3_output))
x = MaxPooling2D(pool_size=(2,2),strides=2)(x)
 
#Decision Layers
x = GlobalAveragePooling2D()(x)
out = Dense(8, activation="softmax")(x)
 
model = Model(inputs=visible, outputs=out)
 
model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
 
model.summary()
 

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 128, 647, 1) 0                                            
__________________________________________________________________________________________________
tf.compat.v1.pad_1 (TFOpLambda) (None, 130, 649, 1)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_11 (Conv2D)              (None, 128, 647, 32) 320         tf.compat.v1.pad_1[0][0]         
__________________________________________________________________________________________________
batch_normalization_20 (BatchNo (None, 128, 647, 32) 128         conv2d_11[0][0]                  
____________________________________________________________________________________________

In [40]:
labels = np.load("../Data/FMA_Small_Spectrogram_Data_Labels.npy")
data = np.load("../Data/FMA_Small_Spectrogram_Data.npy")



X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.30, shuffle=True)
X_train = X_train.reshape(X_train.shape[0], 128, 647, 1)
X_test = X_test.reshape(X_test.shape[0], 128, 647, 1)

X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.35, shuffle=True)

print("Training Shape: {} ... {}".format(X_train.shape,y_train.shape))
print("Testing Shape: {} ... {}".format(X_test.shape,y_test.shape))
print("Validation Shape: {} ... {}".format(X_val.shape,y_val.shape))

Training Shape: (39169, 128, 647, 1) ... (39169, 8)
Testing Shape: (10912, 128, 647, 1) ... (10912, 8)
Validation Shape: (5876, 128, 647, 1) ... (5876, 8)


In [None]:
train_ind = 39169 - 2000
test_ind = 13430 + 500
val_ind = 3358 + 1500
 
X_train = data[0:train_ind,:,:]
y_train = labels[0:train_ind,:]
 
X_test = data[0:test_ind,:,:]
y_test = labels[0:test_ind,:]
 
X_val = data[0:val_ind,:,:]
y_val = labels[0:val_ind,:]
 
X_train = X_train.reshape(X_train.shape[0], 128, 647, 1)
X_test = X_test.reshape(X_test.shape[0], 128, 647, 1)
X_val = X_val.reshape(X_val.shape[0], 128, 647, 1)
 
print("Training Shape: {} ... {}".format(X_train.shape,y_train.shape))
print("Testing Shape: {} ... {}".format(X_test.shape,y_test.shape))
print("Validation Shape: {} ... {}".format(X_val.shape,y_val.shape))

In [None]:
history = model.fit(X_train,
                        y_train, 
                        batch_size=32,
                        validation_data=(X_test, y_test),
                        epochs=30)


In [10]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.python.keras import utils
import json
import tensorflow as tf
from tensorflow.keras import layers
from keras.models import Model
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers.merge import concatenate
from keras.utils import plot_model
import tifffile 

with open("../Genre_Track_Id_Dict.json",'r') as j:
        id_genre_dict = json.load(j)
numerical_labels = dict(zip(list(id_genre_dict.keys()),np.arange(0,8)))


In [12]:
data = np.zeros((56000,128,647))
labels = np.zeros((56000,len(numerical_labels)))

ct=-1
for genre in os.listdir("../Data/mp3_files_fma_small"):
    genre_dir = os.path.join("../Data/mp3_files_fma_small",genre)
    for fname in [f for f in os.listdir(genre_dir) if ".tiff" in f]:
        mel_db_path = os.path.join(genre_dir,fname)

        spect = tifffile.imread(mel_db_path)
        if spect.shape[1] == 646:
            spect = np.hstack((spect,np.zeros((128,1))))
        if spect.shape[1] == 647:
            ct+=1
            data[ct,:,:] = spect
            genre_encode = numerical_labels[genre] 
            labels[ct][genre_encode] = 1
data = data[0:ct,:,:]
labels = labels[0:ct,:]

data = data/-80


X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.30, shuffle=True)
X_train = X_train.reshape(X_train.shape[0], 128, 647, 1)
X_test = X_test.reshape(X_test.shape[0], 128, 647, 1)

X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.35, shuffle=True)

print("Training Shape: {} ... {}".format(X_train.shape,y_train.shape))
print("Testing Shape: {} ... {}".format(X_test.shape,y_test.shape))
print("Validation Shape: {} ... {}".format(X_val.shape,y_val.shape))

Training Shape: (39169, 128, 647, 1) ... (39169, 8)
Testing Shape: (13430, 128, 647, 1) ... (13430, 8)
Validation Shape: (3358, 128, 647, 1) ... (3358, 8)


In [32]:
config = {"../Data/fma_small_npy_format/Testing_Data":X_test,
         "../Data/fma_small_npy_format/Testing_Labels":y_test,
         "../Data/fma_small_npy_format/Training_Data":X_train,
         "../Data/fma_small_npy_format/Training_Labels":y_train,
         "../Data/fma_small_npy_format/Validation_Data":X_val,
         "../Data/fma_small_npy_format/Validation_Labels":y_val}

ct = 0
for outdir,data_tensor in config.items():
    
    
    for arr in data_tensor:
        ct +=1
        fname = outdir.split("/")[-1] +"_{}.npy".format(ct)
        outpath = os.path.join(outdir,fname)
        np.save(outpath,arr)
        

In [None]:
# function for creating a projected inception module
def inception_module(layer_in, f1, f2_in, f2_out, f3_in, f3_out, f4_out, n):
    # 1x1 conv
    conv1 = Conv2D(f1, (1,1), padding='same', activation='relu',name=f'1x1_conv__{n}')(BatchNormalization(axis=-1)(layer_in))
    # 3x3 conv
    conv3 = Conv2D(f2_in, (1,1), padding='same', activation='relu',)(BatchNormalization(axis=-1)(layer_in))
    conv3 = Conv2D(f2_out, (3,3), padding='same', activation='relu',name=f'3x3_conv__{n}')(BatchNormalization(axis=-1)(conv3))
    # 5x5 conv
    conv5 = Conv2D(f3_in, (1,1), padding='same', activation='relu')(BatchNormalization(axis=-1)(layer_in))
    conv5 = Conv2D(f3_out, (5,5), padding='same', activation='relu',name=f'5x5_conv__{n}')(BatchNormalization(axis=-1)(conv5))
    # 3x3 max pooling
    pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(layer_in)
    pool = Conv2D(f4_out, (1,1), padding='same', activation='relu')(BatchNormalization(axis=-1)(pool))
    # concatenate filters, assumes filters/channels last
    layer_out = concatenate([conv1, conv3, conv5, pool], axis=-1)
    return layer_out

In [None]:
paddings = tf.constant([[0, 0],[1,1],[1,1],[0,0]])
visible = Input(shape=(128,647,1))

padded_input = tf.pad(visible,paddings,"CONSTANT")
x = Conv2D(32,(3,3),activation='relu',input_shape=(128,647,1))(padded_input)
x = BatchNormalization(axis=-1)(x)
incept_1_input = layers.MaxPooling2D(pool_size=(1,4),name='incept_1_input')(x)

f1 = 32
f2_in = 64
f2_out = 32
f3_in = 16
f3_out = 32
f4_out = 32

#Inception Module
incept_1 = inception_module(incept_1_input, f1, f2_in, f2_out, f3_in, f3_out, f4_out,1)

incept_2_input = concatenate([incept_1,incept_1_input],name='incept_2_input')
incept_2 = inception_module(incept_2_input, f1, f2_in, f2_out, f3_in, f3_out, f4_out,2)

incept_3_input = concatenate([incept_2_input,incept_2],name='incept_3_input')
incept_3 = inception_module(incept_3_input, f1, f2_in, f2_out, f3_in, f3_out, f4_out,3)

incept_3_output = concatenate([incept_3,incept_3_input],name='incept_3_output')

#Transition Layers
x = BatchNormalization(axis=-1)(Conv2D(32,(1,1))(incept_3_output))
x = layers.MaxPooling2D(pool_size=(2,2),strides=2)(x)

#Decision Layers
x = layers.GlobalAveragePooling2D()(x)
out = layers.Dense(8, activation="softmax")(x)

model = Model(inputs=visible, outputs=out)

model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(X_train,
                        y_train, 
                        batch_size=32,
                        validation_data=(X_test, y_test),
                        epochs=30)
