In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import tensorflow as tf
from tensorflow import keras
    
my_features_path = 'deam-dataset/my_features/'

In [2]:
import zipfile as zf
import os

os.chdir(my_features_path)
files = zf.ZipFile("mfcc.zip", 'r')
files.extractall('mfcc/')
files.close()

In [5]:
!pip install zipfile38

Collecting zipfile38
  Downloading zipfile38-0.0.3.tar.gz (22 kB)
Building wheels for collected packages: zipfile38
  Building wheel for zipfile38 (setup.py) ... [?25ldone
[?25h  Created wheel for zipfile38: filename=zipfile38-0.0.3-py3-none-any.whl size=22765 sha256=b2f67b85ac46631b2adf7c931fcfdcff85338fed52e27609496065c5f18f0fb3
  Stored in directory: /home/jovyan/.cache/pip/wheels/32/e9/b6/cf61548b40caf6cf2a84bb123064993db2215cc8c13f23f58e
Successfully built zipfile38
Installing collected packages: zipfile38
Successfully installed zipfile38-0.0.3


In [6]:
import zipfile38 as zf38
files = zf38.ZipFile("mel_spectogram_features.zip", 'r')
files.extractall()
files.close()

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [15]:
os.chdir('..')
os.listdir()

['best_model2',
 'deam-dataset',
 'valid_data.npz',
 'deam_data_fetch.py',
 'best_model',
 'deam_cnn_processing.ipynb',
 '.Trash-1000',
 'train_data.npz',
 'deam_cnn.py',
 'test_cnn_log.txt',
 'test_data.npz',
 'deam_cnn_model.ipynb',
 'DEAM_CNN-LSTM.ipynb',
 '.ipynb_checkpoints',
 'deam-dataset.zip',
 '__pycache__']

In [21]:
def fetch_mfcc(dir):
    files = os.listdir(dir)
    csv_files = list(filter(lambda f: '.csv' in f, files))
    mfcc_files = sorted(csv_files, key=lambda f: int(f[5:][:-4]))   # sort by audio index (remove 'mfcc' and '.csv')
    mfcc_data = np.empty(shape=(len(mfcc_files), 1920, 20), dtype=np.float32)
    for i, f in enumerate(mfcc_files):
        if '.csv' not in f: continue 
        p = os.path.join(dir, f)
        mfcc = np.loadtxt(p, delimiter=',')
        truncated_mfcc = mfcc[:, :1920]
        mfcc_data[i] = truncated_mfcc.T
    return mfcc_data

my_features_path = 'deam-dataset/my_features/'
mfcc_path = os.path.join(my_features_path, 'mfcc')
mfcc_data = fetch_mfcc(mfcc_path)
mfcc_data.shape

(1802, 1920, 20)

In [22]:
def fetch_mel_spectograms(dir):
    files = os.listdir(dir)
    csv_files = list(filter(lambda f: '.csv' in f, files))
    mel_files = sorted(csv_files, key=lambda f: int(f[len('mel_spectogram_'):][:-4]))   # sort by audio index (remove 'mel_spectogram_' and '.csv')
    mel_spec_data = np.empty(shape=(len(mel_files), 1920, 128, ), dtype=np.float32)

    for i, f in enumerate(mel_files):
        p = os.path.join(dir, f)
        mel_spec = np.loadtxt(p, delimiter=',', dtype=np.float32)
        truncated_mel_spec = mel_spec[:, :1920]
        mel_spec_data[i] = truncated_mel_spec.T
    return mel_spec_data

mel_path = os.path.join(my_features_path, 'mel_spectogram_features')
mel_spec_data = fetch_mel_spectograms(mel_path)
mel_spec_data.shape

(1802, 1920, 128)

In [23]:
np.save('mfcc_data', mfcc_data)
np.save('mel_spectogram_data', mel_spec_data)

In [24]:
# fetch labels
label_path = 'deam-dataset/DEAM_Annotations/annotations/annotations averaged per song/song_level/';
labels_1_2000 = pd.read_csv(os.path.join(label_path, 'static_annotations_averaged_songs_1_2000.csv'))
labels_2000_2058 = pd.read_csv(os.path.join(label_path,'static_annotations_averaged_songs_2000_2058.csv'))
labels = pd.concat([labels_1_2000, labels_2000_2058], ignore_index=True, sort=False)
labels = labels[labels_1_2000.columns]
labels.columns = labels.columns.str.replace(' ', '')

labels

Unnamed: 0,song_id,valence_mean,valence_std,arousal_mean,arousal_std
0,2,3.10,0.94,3.00,0.63
1,3,3.50,1.75,3.30,1.62
2,4,5.70,1.42,5.50,1.63
3,5,4.40,2.01,5.30,1.85
4,7,5.80,1.47,6.40,1.69
...,...,...,...,...,...
1797,2054,5.40,1.20,3.60,1.36
1798,2055,5.00,1.41,5.20,1.47
1799,2056,5.00,1.41,4.60,1.74
1800,2057,3.17,1.07,6.83,0.37


In [25]:
valence = labels['valence_mean'].to_numpy()
arousal = labels['arousal_mean'].to_numpy()
y = np.vstack([valence, arousal]).T
y.shape

(1802, 2)

In [26]:
concat_input = np.concatenate((mfcc_data, mel_spec_data), 2)
print('Concatenated input shape: ', concat_input.shape)

Concatenated input shape:  (1802, 1920, 148)


In [27]:
from sklearn.model_selection import train_test_split

x_ids = list(range(concat_input.shape[0]))

X_trn_ids, X_test_ids, y_trn, y_test = train_test_split(x_ids, y, test_size=0.2, random_state=23)
X_train_ids, X_valid_ids, y_train, y_valid = train_test_split(X_trn_ids, y_trn, test_size=0.33,  random_state=23)
data_size = concat_input.shape[0]
X_train, X_valid, X_test = concat_input[X_train_ids], concat_input[X_valid_ids], concat_input[X_test_ids]
# split_1, split_2 = (int) (0.6 * data_size), (int) (0.8 * data_size)
# X_train, X_valid, X_test = concat_input[:split_1], concat_input[split_1:split_2], concat_input[split_2:]
# y_train, y_valid, y_test = y[:split_1], y[split_1:split_2], y[split_2:]

X_mfcc_train, X_mel_train = X_train[:, :, :20], X_train[:, :, 20:]
X_mfcc_valid, X_mel_valid = X_valid[:, :, :20], X_valid[:, :, 20:]
X_mfcc_test, X_mel_test = X_test[:, :, :20], X_test[:, :, 20:]

In [28]:
np.savez_compressed('cnn_train_data', x=X_train, y=y_train)
np.savez_compressed('cnn_valid_data', x=X_valid, y=y_valid)
np.savez_compressed('cnn_test_data', x=X_test, y=y_test)

In [27]:
from functools import partial
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, LayerNormalization, BatchNormalization

# drop 0th coefficient as it only conveys a constant offset
X_mfcc_train_f = np.delete(X_mfcc_train, 0, axis=2)
X_mfcc_valid_f = np.delete(X_mfcc_valid, 0, axis=2)
X_mfcc_test_f = np.delete(X_mfcc_test, 0, axis=2)
conv_layer = partial(Conv1D, kernel_size=4, activation='relu', padding='SAME', strides=1)
pooling = partial(MaxPooling1D, pool_size=4, strides=4, padding='SAME')

model = keras.models.Sequential([
    LayerNormalization(),
    conv_layer(input_shape=(1920, 19), filters=32, kernel_size=6),
    pooling(),
    conv_layer(filters=64),
    pooling(),
    conv_layer(filters=128),
    pooling(),
    Flatten(),
    Dense(units=128, activation='relu'),
    Dropout(0.4),
    Dense(units=2)
])

In [28]:
model_path = 'best_model2/'

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
checkpoint = tf.keras.callbacks.ModelCheckpoint(monitor='val_loss', save_best_only=True, filepath=model_path)

optimizer = keras.optimizers.Adam(lr=0.00001)
model.compile(loss='mae', optimizer=optimizer, metrics=['mean_squared_error'])
history = model.fit(X_mfcc_train_f, y_train, 
                    epochs=30,
                    validation_data=(X_mfcc_valid_f, y_valid), 
                    batch_size=16, 
                    callbacks=[early_stopping, checkpoint])  

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30


In [11]:
score = model.evaluate(X_mfcc_test, y_test)



In [12]:
def categorize_val_arousal(data):
    cat_data = []
    # A: active, P: passive
    # P: positive, N: negative
    for point in data:
        valence, arousal = point[0], point[1]
        if valence >= 5 and arousal >= 5:
            cat_data.append('AP')
        elif valence < 5 and arousal >= 5:
            cat_data.append('AN')
        elif valence >= 5 and arousal < 5:
            cat_data.append('PP')
        elif valence < 5 and arousal < 5:
            cat_data.append('PN')
    return np.array(cat_data, dtype='str')

In [30]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_mfcc_test_f)

cat_y_pred = categorize_val_arousal(y_pred)
cat_y_test = categorize_val_arousal(y_test)

accuracy = accuracy_score(cat_y_test, cat_y_pred)
print(f'Test Accuracy: {accuracy}')

Test Accuracy: 0.3518005540166205


In [18]:
y_pred[10]

array([4.75282  , 4.8825507], dtype=float32)

In [17]:
y_test[10]

array([4.2, 3.6])

In [None]:
mel_model = keras.models.Sequential([
    LayerNormalization(),
    conv_layer(input_shape=(1920, 128), filters=32, kernel_size=8),
    pooling(),
    conv_layer(filters=64),
    pooling(),
    conv_layer(filters=128),
    pooling(),
    Flatten(),
    Dense(units=128, activation='relu'),
    Dropout(0.4),
    Dense(units=2)
])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

optimizer = keras.optimizers.Adam(lr=0.00001)
mel_model.compile(loss='mae', optimizer=optimizer, metrics=['mean_squared_error'])
history = mel_model.fit(X_mel_train, y_train, 
                    epochs=30,
                    validation_data=(X_mel_valid, y_valid), 
                    batch_size=8, 
                    callbacks=[early_stopping])  

In [None]:
cat_y = np.array(categorize_val_arousal(y))

cat_y_train, cat_y_valid, cat_y_test = categorize_val_arousal(y_train), categorize_val_arousal(y_valid), categorize_val_arousal(y_test)

In [None]:
X_mel_train.shape

In [None]:
cat_y_valid

In [1]:
model = keras.models.Sequential([
    LayerNormalization(axis=1),
    conv_layer(input_shape=(1920, 20), filters=64, kernel_size=8),
    pooling(),
    conv_layer(filters=64),
    pooling(),
    conv_layer(filters=128),
    pooling(),
    Flatten(),
    Dense(units=256, activation='relu'),
    Dropout(0.5),
    Dense(units=1, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_mfcc_train, cat_y_train, 
                    epochs=30,
                    validation_data=(X_mfcc_valid, cat_y_valid), 
                    batch_size=32)

NameError: name 'keras' is not defined