# Music Emotion Detection
Music emotion estimation using different machine learning architechtures
- Suppor Vector Machine (SVM)
- Neural Net (50, 4)
- Neural Net with Dropout (50, 4)
- Neural Net (50, 20, 4)
- K Nearest Neighbor (KNN)

## Dataset
Emotion recognition 4Q audio emotion dataset: http://mir.dei.uc.pt/downloads.html

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import LeaveOneOut,KFold
from sklearn.preprocessing import OneHotEncoder

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, confusion_matrix

In [4]:
# Loading data and creating a feature set
data = pd.read_csv('music_data/top100_features.csv')
labels = pd.read_csv('music_data/panda_dataset_taffc_annotations.csv')
data = data.drop(labels='IDSong', axis=1).to_numpy()
labels = labels.drop(labels='Song', axis=1).to_numpy()

label_map = {
    'Q1': [0, 1],
    'Q2': [0, 0],
    'Q3': [1, 0],
    'Q4': [1, 1]
}
print(data.shape)
print(labels[:10])

(900, 100)
[['Q3']
 ['Q2']
 ['Q2']
 ['Q1']
 ['Q3']
 ['Q4']
 ['Q1']
 ['Q3']
 ['Q4']
 ['Q2']]


## SVM 10-Fold Cross Validation

In [5]:
kfold = KFold(n_splits=10)
ctr = 0
labels_flat = labels.ravel()
avg_score = 0
total_confusion = 0
for train_indices, test_indices in kfold.split(data):
  train_X, test_X = data[train_indices, :], data[test_indices, :]
  train_Y, test_Y = labels_flat[train_indices], labels_flat[test_indices]

  # Scale the data
  scaler = StandardScaler()
  scaler.fit(train_X)

  # Fit SVM to training data
  clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
  clf.fit(scaler.transform(train_X), train_Y)

  pred = clf.predict(scaler.transform(test_X))
  avg_score += f1_score(test_Y, pred, average='micro')
  total_confusion += confusion_matrix(test_Y, pred)

print(avg_score/10)
print(total_confusion)
precision = np.diag(total_confusion)/np.sum(total_confusion, axis=0)
recall = np.diag(total_confusion)/np.sum(total_confusion, axis=1)
f1 = (2*precision*recall)/(precision+recall)
print(precision)
print(recall)
print(f1)


0.7488888888888889
[[184  15   5  21]
 [ 29 185   6   5]
 [ 18   6 151  50]
 [ 26   1  44 154]]
[0.71595331 0.89371981 0.73300971 0.66956522]
[0.81777778 0.82222222 0.67111111 0.68444444]
[0.76348548 0.85648148 0.70069606 0.67692308]


## Neural Net (50x4) 

In [8]:
cat = OneHotEncoder()
one_hot_labels = cat.fit_transform(labels).toarray()

kfold = KFold(n_splits=10)
ctr = 0
avg_score = 0
total_confusion = 0
for train_indices, test_indices in kfold.split(data):
  ctr+=1
  print("KFOLD SPLIT: "+str(ctr))
  train_X, test_X = data[train_indices, :], data[test_indices, :]
  train_Y, test_Y = one_hot_labels[train_indices], labels_flat[test_indices]

  scaler = StandardScaler()
  scaler.fit(train_X)
  train_dataset = tf.data.Dataset.from_tensor_slices((scaler.transform(train_X), train_Y))
  test_dataset = tf.data.Dataset.from_tensor_slices((scaler.transform(test_X), test_Y))
  SHUFFLE_BUFFER_SIZE = 90

  BATCH_SIZE = 1
  train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
  test_dataset = test_dataset.batch(BATCH_SIZE)

  model = keras.Sequential([
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(4, activation='softmax')
  ])

  model.compile(optimizer=keras.optimizers.RMSprop(),
              loss=keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])
  
  model.fit(train_dataset, epochs=10, verbose=0)
  pred = cat.inverse_transform((model.predict(scaler.transform(test_X))))
  avg_score += f1_score(test_Y, pred, average='micro')
  total_confusion += confusion_matrix(test_Y, pred)
print(avg_score/10)
print(total_confusion)
precision = np.diag(total_confusion)/np.sum(total_confusion, axis=0)
recall = np.diag(total_confusion)/np.sum(total_confusion, axis=1)
f1 = (2*precision*recall)/(precision+recall)
print(precision)
print(recall)
print(f1)


KFOLD SPLIT: 1
KFOLD SPLIT: 2
KFOLD SPLIT: 3
KFOLD SPLIT: 4
KFOLD SPLIT: 5
KFOLD SPLIT: 6
KFOLD SPLIT: 7
KFOLD SPLIT: 8
KFOLD SPLIT: 9
KFOLD SPLIT: 10
0.7111111111111111
[[176  17   8  24]
 [ 37 176   7   5]
 [ 14  10 131  70]
 [ 32   3  33 157]]
[0.67953668 0.85436893 0.73184358 0.61328125]
[0.78222222 0.78222222 0.58222222 0.69777778]
[0.72727273 0.81670534 0.64851485 0.65280665]


## Neural Net (50x4 with Dropout)

In [13]:
cat = OneHotEncoder()
one_hot_labels = cat.fit_transform(labels).toarray()

labels_2dim = np.array([label_map[label[0]] for label in labels])
labels_flat = labels.ravel()
kfold = KFold(n_splits=10)
ctr = 0
avg_score = 0
total_confusion = 0
for train_indices, test_indices in kfold.split(data):
  ctr+=1
  print("KFOLD SPLIT: "+str(ctr))
  train_X, test_X = data[train_indices, :], data[test_indices, :]
  train_Y, test_Y = one_hot_labels[train_indices], labels_flat[test_indices]

  scaler = StandardScaler()
  scaler.fit(train_X)
  train_dataset = tf.data.Dataset.from_tensor_slices((scaler.transform(train_X), train_Y))
  test_dataset = tf.data.Dataset.from_tensor_slices((scaler.transform(test_X), test_Y))
  SHUFFLE_BUFFER_SIZE = 90

  BATCH_SIZE = 1
  train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
  test_dataset = test_dataset.batch(BATCH_SIZE)

  model = keras.Sequential([
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4, activation='softmax')
  ])

  model.compile(optimizer=keras.optimizers.RMSprop(),
              loss=keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])
  
  model.fit(train_dataset, epochs=10, verbose=0)
  pred = cat.inverse_transform((model.predict(scaler.transform(test_X))))
  avg_score += f1_score(test_Y, pred, average='micro')
  total_confusion += confusion_matrix(test_Y, pred)
print(avg_score/10)
print(total_confusion)
precision = np.diag(total_confusion)/np.sum(total_confusion, axis=0)
recall = np.diag(total_confusion)/np.sum(total_confusion, axis=1)
f1 = (2*precision*recall)/(precision+recall)
print(precision)
print(recall)
print(f1)


KFOLD SPLIT: 1
KFOLD SPLIT: 2
KFOLD SPLIT: 3
KFOLD SPLIT: 4
KFOLD SPLIT: 5
KFOLD SPLIT: 6
KFOLD SPLIT: 7
KFOLD SPLIT: 8
KFOLD SPLIT: 9
KFOLD SPLIT: 10
0.7155555555555556
[[177  18   7  23]
 [ 36 179   5   5]
 [ 16  10 128  71]
 [ 27   2  36 160]]
[0.69140625 0.85645933 0.72727273 0.61776062]
[0.78666667 0.79555556 0.56888889 0.71111111]
[0.73596674 0.82488479 0.63840399 0.66115702]


## Neural Net(50x20x4)

In [10]:
cat = OneHotEncoder()
one_hot_labels = cat.fit_transform(labels).toarray()

labels_2dim = np.array([label_map[label[0]] for label in labels])
labels_flat = labels.ravel()
kfold = KFold(n_splits=10)
ctr = 0
avg_score = 0
total_confusion = 0
for train_indices, test_indices in kfold.split(data):
  ctr+=1
  print("KFOLD SPLIT: "+str(ctr))
  train_X, test_X = data[train_indices, :], data[test_indices, :]
  train_Y, test_Y = one_hot_labels[train_indices], labels_flat[test_indices]

  scaler = StandardScaler()
  scaler.fit(train_X)
  train_dataset = tf.data.Dataset.from_tensor_slices((scaler.transform(train_X), train_Y))
  test_dataset = tf.data.Dataset.from_tensor_slices((scaler.transform(test_X), test_Y))
  SHUFFLE_BUFFER_SIZE = 90

  BATCH_SIZE = 1
  train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
  test_dataset = test_dataset.batch(BATCH_SIZE)

  model = keras.Sequential([
    keras.layers.Dense(50, activation='relu'),
    keras.layers.Dense(20, activation='relu'),
    keras.layers.Dense(4, activation='softmax')
  ])

  model.compile(optimizer=keras.optimizers.RMSprop(),
              loss=keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])
  
  model.fit(train_dataset, epochs=10, verbose=0)
  pred = cat.inverse_transform((model.predict(scaler.transform(test_X))))
  avg_score += f1_score(test_Y, pred, average='micro')
  total_confusion += confusion_matrix(test_Y, pred)
print(avg_score/10)
print(total_confusion)
precision = np.diag(total_confusion)/np.sum(total_confusion, axis=0)
recall = np.diag(total_confusion)/np.sum(total_confusion, axis=1)
f1 = (2*precision*recall)/(precision+recall)
print(precision)
print(recall)
print(f1)

KFOLD SPLIT: 1
KFOLD SPLIT: 2
KFOLD SPLIT: 3
KFOLD SPLIT: 4
KFOLD SPLIT: 5
KFOLD SPLIT: 6
KFOLD SPLIT: 7
KFOLD SPLIT: 8
KFOLD SPLIT: 9
KFOLD SPLIT: 10
0.7033333333333334
[[167  20  10  28]
 [ 36 177   7   5]
 [ 16   9 131  69]
 [ 30   3  34 158]]
[0.67068273 0.84688995 0.71978022 0.60769231]
[0.74222222 0.78666667 0.58222222 0.70222222]
[0.70464135 0.8156682  0.64373464 0.65154639]


## k-Nearest Neighbors

In [11]:
kfold = KFold(n_splits=10)
ctr = 0
labels_flat = labels.ravel()
avg_score = 0
total_confusion = 0
for train_indices, test_indices in kfold.split(data):
  train_X, test_X = data[train_indices, :], data[test_indices, :]
  train_Y, test_Y = labels_flat[train_indices], labels_flat[test_indices]

  scaler = StandardScaler()
  scaler.fit(train_X)

  clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=20))
  clf.fit(scaler.transform(train_X), train_Y)

  pred = clf.predict(scaler.transform(test_X))
  avg_score += f1_score(test_Y, pred, average='micro')
  total_confusion += confusion_matrix(test_Y, pred)
print(avg_score/10)
print(total_confusion)
precision = np.diag(total_confusion)/np.sum(total_confusion, axis=0)
recall = np.diag(total_confusion)/np.sum(total_confusion, axis=1)
f1 = (2*precision*recall)/(precision+recall)
print(precision)
print(recall)
print(f1)

0.6755555555555556
[[187  10  16  12]
 [ 51 164   5   5]
 [ 36   5 139  45]
 [ 32   1  74 118]]
[0.61111111 0.91111111 0.59401709 0.65555556]
[0.83111111 0.72888889 0.61777778 0.52444444]
[0.70433145 0.80987654 0.60566449 0.58271605]
