# Build CNN

In [4]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tensorflow import keras
from tensorflow.keras import layers

# for working with audio data
import librosa, librosa.display

## Data Preparation

In [5]:
# load metadata csv
df = pd.read_csv('../../Data/features_30_sec.csv')
df = df[['filename', 'label']]

# relative path to the directory of audio file genre subfolders
rel_path = '../../Data/genres_original/'

# create relative path to each audio file
df['file_path'] = rel_path + df['label'] + '/' + df['filename']

# drop 'filename' column
df = df.drop(columns='filename')
df.head()

Unnamed: 0,label,file_path
0,blues,../../Data/genres_original/blues/blues.00000.wav
1,blues,../../Data/genres_original/blues/blues.00001.wav
2,blues,../../Data/genres_original/blues/blues.00002.wav
3,blues,../../Data/genres_original/blues/blues.00003.wav
4,blues,../../Data/genres_original/blues/blues.00004.wav


In [6]:
# this jazz song is corrupted and will be removed
#y, sr = librosa.load(df['file_path'][554])

In [7]:
# drop the corrupted song
df = df.drop(labels=554)

# check
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 999 entries, 0 to 999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   label      999 non-null    object
 1   file_path  999 non-null    object
dtypes: object(2)
memory usage: 23.4+ KB


In [8]:
# instantiate, fit, transform multiclass target
le = LabelEncoder()
y = le.fit_transform(df['label'])

# one hot encode multiclass target
output_shape = df['label'].nunique()
y = keras.utils.to_categorical(y, output_shape)

# check shape
y.shape

(999, 10)

In [9]:
# split into a 10% holdout set
X_t, X_hold, y_t, y_hold = train_test_split(df['file_path'], y, random_state=42, stratify=y, test_size=0.1)

# split into 75% train, 15% test sets
X_train, X_test, y_train, y_test = train_test_split(X_t, y_t, random_state=42, stratify=y_t, test_size=15/90)

In [10]:
mel_specs = []
for sample in X_train:
    try:
        y, sr = librosa.load(sample, duration=20)
        S_dB = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr), ref=np.min)
        
        # append to the list and create a new axis for concatenation later and one for the single amplitude channel
        mel_specs.append(S_dB[np.newaxis,..., np.newaxis])
    except:
        continue

# concatenate along the first axis; result should be a 4D tensor of shape (#samples, #mels, #frames, #channels)
X_train_mel = np.concatenate(mel_specs,axis=0)

# check shape
X_train_mel.shape

(749, 128, 862, 1)

In [11]:
def get_feature(X, sample_duration):
    features = []
    for sample in X:
        try:
            y, sr = librosa.load(sample, duration=sample_duration)
            S_dB = librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=sr), ref=np.min)
            
            # append to the list and create a new axis for concatenation later and one for the single amplitude channel
            features.append(S_dB[np.newaxis,..., np.newaxis])
        except:
            continue

    # concatenate along the first axis; result should be a 4D tensor of shape (#samples, #mels, #frames, #channels)
    return np.concatenate(features,axis=0)

In [12]:
# generate mel spectrograms for train
X_train_mel = get_feature(X_train, 20)
X_train_mel.shape

(749, 128, 862, 1)

In [13]:
# define input shape
input_shape = X_train_mel.shape[1:]

In [14]:
# generate mel spectrograms for test
X_test_mel = get_feature(X_test, 20)

## First Model - Simple Multi-Layer Perceptron

## CNN

In [15]:
# build sequentially
cnn = keras.Sequential()
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
cnn.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
cnn.add(layers.MaxPooling2D((2, 2)))
cnn.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
cnn.add(layers.MaxPooling2D((2, 2)))

# fully-connected layers for output
cnn.add(layers.Flatten())
cnn.add(layers.Dense(128, activation='relu'))
cnn.add(layers.Dense(10, activation='softmax'))

cnn.compile(loss='categorical_crossentropy',
            optimizer="adam",
            metrics=['accuracy'])

# take a look at model architecture
cnn.summary()

2021-11-17 14:21:32.345412: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 862, 32)      320       
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 862, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 64, 431, 32)      0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 64, 431, 64)       18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 64, 431, 64)       36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 32, 215, 64)      0         
 2D)                                                    

In [16]:
# fit the model
history = cnn.fit(X_train,
                  y_train,
                  epochs=2,
                  batch_size=batch_size,
                  validation_data=(X_test, y_test))

NameError: name 'batch_size' is not defined