In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1. Reading in the data

In [2]:
import json
root_path = "/content/drive/MyDrive/emotifymusic"
with open(root_path+"/labels.json", 'r') as f:
    labels = json.load(f)

In [3]:
#mapping = [' amazement', ' solemnity', ' tenderness',' nostalgia', ' calmness', ' power', ' joyful', ' tension',' sadness']
import pandas as pd
labels = pd.DataFrame(labels)
labels.head()

Unnamed: 0,file_path,output_path,label
0,classical/1.mp3,classical_1.mp3,4
1,classical/1.mp3,classical_1.mp3,4
2,classical/1.mp3,classical_1.mp3,4
3,classical/1.mp3,classical_1.mp3,4
4,classical/1.mp3,classical_1.mp3,4


In [4]:
import numpy as np
path = "/content/drive/MyDrive/emotifymusic/spectrograms/"
log_spectrograms = []
for index,rows in labels.iterrows():
  spec_path =  path+rows["output_path"]+".npy"
  spec = np.load(spec_path)
  log_spectrograms.append(spec)
len(log_spectrograms)


2385

In [5]:
labels["log_spectrogram"] = log_spectrograms
labels.head()

Unnamed: 0,file_path,output_path,label,log_spectrogram
0,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
1,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
2,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
3,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."
4,classical/1.mp3,classical_1.mp3,4,"[[-23.430485, -35.5547, -31.009932, -33.170452..."


In [6]:
# input shape 1024*431
list(labels["log_spectrogram"])[0][1][1]

-23.384499

# 2. Train test split

In [7]:
from sklearn.model_selection import train_test_split
import tensorflow as tf

X = np.array(list(labels["log_spectrogram"])).astype("float32")
X = X[...,np.newaxis]
y = np.array(labels["label"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [None]:
X_train.shape

(1908, 1024, 431, 1)

# 3. Builiding the basic A2E model


In [8]:
import tensorflow.keras as keras
from tensorflow.keras import layers

model = keras.Sequential()
model.add(keras.layers.Conv2D(64, (5, 5), strides=2, activation="relu", padding="valid",input_shape = (X_train.shape[1],X_train.shape[2],1)))
model.add(keras.layers.BatchNormalization())

# 2nd Layer
model.add(keras.layers.Conv2D(64, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 3rd Layer
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Dropout(0.3))

# 4th Layer
model.add(keras.layers.Conv2D(128, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 5th Layer
model.add(keras.layers.Conv2D(128, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 6th Layer
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Dropout(0.3))

# 7th Layer
model.add(keras.layers.Conv2D(256, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 8th Layer
model.add(keras.layers.Conv2D(256, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 9th Layer
model.add(keras.layers.Conv2D(384, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 10th Layer
model.add(keras.layers.Conv2D(512, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 11th Layer
model.add(keras.layers.Conv2D(256, (3, 3), strides=1, activation="relu", padding="same"))
model.add(keras.layers.BatchNormalization())

# 12th layer Dense
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(9,activation="softmax"))

# 4. Training the model

In [9]:
optimiser = keras.optimizers.Adam(learning_rate=0.0005)
model.compile(optimizer=optimiser,loss="sparse_categorical_crossentropy",metrics=["accuracy"])
history_model = model.fit(X_train, y_train, epochs = 30, batch_size=8, validation_data=(X_test,y_test))
model.summary()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 510, 214, 64)      1664      
                                                                 
 batch_normalization (Batch  (None, 510, 214, 64)      256       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 510, 214, 64)      36928     
                                                                 
 batch_normalization_1 (Bat  (None,

In [13]:
#model.save('/content/drive/MyDrive/emotifymusic/emotion_model.keras')
model.save('/content/drive/MyDrive/emotifymusic/emotion_model.h5')

  saving_api.save_model(


In [12]:
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/emotifymusic/emotion_model.h5')

OSError: No file or directory found at /content/drive/MyDrive/emotifymusic/emotion_model.h5

# 4. Inference with loaded model

In [None]:
#loaded_model.predict(X_test[0])
single_test = X_test[0]
single_test = np.expand_dims(single_test, axis=0)
results = loaded_model.predict(single_test)



In [None]:
mapping = [' amazement', ' solemnity', ' tenderness',' nostalgia', ' calmness', ' power', ' joyful', ' tension',' sadness']

mapping[np.argmax(results)]

' nostalgia'

array([[[  8.494204  ],
        [  1.417194  ],
        [ 10.17167   ],
        ...,
        [-19.147867  ],
        [  2.8694592 ],
        [  8.560003  ]],

       [[  7.3750124 ],
        [  7.1665554 ],
        [ 12.446455  ],
        ...,
        [ -6.4490595 ],
        [  3.9057302 ],
        [ 13.92552   ]],

       [[  4.3953567 ],
        [ -0.94569147],
        [ 11.233671  ],
        ...,
        [ -4.930639  ],
        [  2.5292344 ],
        [ 16.082832  ]],

       ...,

       [[-23.196985  ],
        [-29.235525  ],
        [-34.83545   ],
        ...,
        [-34.83545   ],
        [-34.83545   ],
        [-30.483929  ]],

       [[-23.196182  ],
        [-29.2332    ],
        [-34.83545   ],
        ...,
        [-34.83545   ],
        [-34.83545   ],
        [-30.484558  ]],

       [[-23.194138  ],
        [-29.232635  ],
        [-34.83545   ],
        ...,
        [-34.83545   ],
        [-34.83545   ],
        [-30.485058  ]]], dtype=float32)