In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
!pip install tensorflow_addons

Collecting tensorflow_addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow_addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.23.0 typeguard-2.13.3


# 1. Reading in the data

In [2]:
import json
root_path = "/content/drive/MyDrive/emotifymusic"
with open(root_path+"/labels.json", 'r') as f:
    labels = json.load(f)

In [3]:
#mapping = [' amazement', ' solemnity', ' tenderness',' nostalgia', ' calmness', ' power', ' joyful', ' tension',' sadness']
import pandas as pd
labels = pd.DataFrame(labels)
labels.head()

Unnamed: 0,file_path,output_path,label
0,classical/1.mp3,classical_1.mp3,4
1,classical/1.mp3,classical_1.mp3,4
2,classical/1.mp3,classical_1.mp3,4
3,classical/1.mp3,classical_1.mp3,4
4,classical/1.mp3,classical_1.mp3,4


In [5]:
import numpy as np
path = "/content/drive/MyDrive/emotifymusic/spectrograms/"
log_spectrograms = []
for index,rows in labels.iterrows():
  spec_path =  path+rows["output_path"]+".npy"
  spec = np.load(spec_path)
  log_spectrograms.append(spec)
len(log_spectrograms)


2385

In [7]:
labels["log_spectrogram"] = log_spectrograms
labels.head()

Unnamed: 0,file_path,output_path,label,log_spectrogram
0,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
1,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
2,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
3,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
4,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."


In [98]:
# input shape 1024*431
list(labels["log_spectrogram"])[0][1][1]

-23.384499

# 2. Train test split

In [102]:
from sklearn.model_selection import train_test_split
import tensorflow as tf

X = np.array(list(labels["log_spectrogram"])).astype("float32")
X = X[...,np.newaxis]
y = np.array(labels["label"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



431

In [104]:
X_train.shape

(1908, 1024, 431, 1)

# 3. Builiding the basic A2E model


In [105]:
import tensorflow.keras as keras
from tensorflow.keras import layers
import tensorflow_addons as tfa

model = keras.Sequential()
model.add(keras.layers.Conv2D(64, (5, 5), strides=2, activation="relu", padding="valid",input_shape = (X_train.shape[1],X_train.shape[2],1)))
model.add(keras.layers.BatchNormalization())

# 2nd Layer
model.add(keras.layers.Conv2D(64, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 3rd Layer
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Dropout(0.3))

# 4th Layer
model.add(keras.layers.Conv2D(128, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 5th Layer
model.add(keras.layers.Conv2D(128, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 6th Layer
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Dropout(0.3))

# 7th Layer
model.add(keras.layers.Conv2D(256, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 8th Layer
model.add(keras.layers.Conv2D(256, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 9th Layer
model.add(keras.layers.Conv2D(384, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 10th Layer
model.add(keras.layers.Conv2D(512, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 11th Layer
model.add(keras.layers.Conv2D(256, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 12th Layer
model.add(tfa.layers.AdaptiveAveragePooling2D((1,1)))

# 13th layer Dense
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(9,activation="softmax"))

# 4. Training the model

In [106]:
optimiser = keras.optimizers.Adam(learning_rate=0.0005)
model.compile(optimizer=optimiser,loss="sparse_categorical_crossentropy",metrics=["accuracy"])
history_model = model.fit(X_train, y_train, epochs = 50, batch_size=8, validation_data=(X_test,y_test))
model.summary()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_46 (Conv2D)          (None, 510, 214, 64)      1664      
                                                                 
 batch_normalization_45 (Ba  (None, 510, 214, 64)      256       
 tchNormalization)                                       

In [108]:
model.save('/content/drive/MyDrive/emotifymusic/my_model.keras')