In [None]:
import zipfile
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import optimizers
from keras import layers
from keras.regularizers import l1
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from keras.models import Sequential
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten, concatenate
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
import pylab as pl
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from sklearn import preprocessing

# audio lib
import librosa
import IPython.display as ipd

In [None]:
path = '../input/PMEmo2019'
features = pd.read_csv(path + '/features/static_features.csv')
features.set_index('musicId', inplace=True)
features.shape

In [None]:
features

In [None]:
example_mp3 = '../input/PMEmo2019/chorus/945.mp3'
ipd.Audio(example_mp3)

In [None]:
annotations = pd.read_csv(path + '/annotations/static_annotations.csv')
annotations.set_index('musicId', inplace=True)
annotations.shape

In [None]:
annotations.head()

In [None]:
din_annotations = pd.read_csv(path + '/annotations/dynamic_annotations.csv')

din_annotations.shape

Mancano delle canzoni, provo a vedere se è causa di un errore di media o mancano delle annotazioni

In [None]:
mean_Arousal = din_annotations.groupby('musicId')['Arousal(mean)'].mean()

mean_Arousal.shape

Il numero di canzoni è uguale al csv static_annotations quindi mancano delle annotazioni riferite a delle canzoni. Cerco gli indici mancanti.

In [None]:
diff_index = features.index.difference(annotations.index)

diff_index

In [None]:
features.drop(diff_index, axis=0, inplace=True)


features.shape

In [None]:
labels = []

annotations = annotations.reset_index() 
for index, row in annotations.iterrows():
    if row[1] <= 0.5 and row[2] <= 0.5:
        labels.append(0) #LL
    elif  row[1] <= 0.5 and row[2] > 0.5:
            labels.append(1) #LW
    elif  row[1] > 0.5 and row[2] <= 0.5:
            labels.append(2) #HL
    elif  row[1] > 0.5 and row[2] > 0.5:
            labels.append(3) #HH

In [None]:
len(labels)

In [None]:
Y = np.array(labels)

unique, counts = np.unique(Y, return_counts=True)

print(np.asarray((unique, counts)).T)


pos  = np.arange(len(unique))
width = 1.0    
ax = plt.axes()
ax.set_xticks(pos)
ax.set_xticklabels(unique)

plt.bar(pos, counts, width, color = ['r','b','g','y'])
plt.show()



In [None]:
features = features.to_numpy()
scaler = preprocessing.MinMaxScaler()
scaler.fit(features)

features_scaled = scaler.transform(features)

features_scaled[0]

# Fully-Connected Neural Network

In [None]:
x_train, x_test, y_train, y_test = train_test_split(features_scaled, Y, test_size=0.1)

y_train =  np_utils.to_categorical(y_train)
y_test =  np_utils.to_categorical(y_test)

print(x_test.shape, y_test.shape)

In [None]:
dims = x_train.shape[1]
nb_classes = 4

mlp = Sequential()
mlp.add(Dense(2048, activation='relu', input_shape=(dims,),kernel_regularizer=l1(0.001)))
mlp.add(Dropout(0.5))
mlp.add(Dense(1024, activation='relu',kernel_regularizer=l1(0.001)))
mlp.add(Dropout(0.4))
mlp.add(Dense(512, activation='relu',kernel_regularizer=l1(0.001)))
mlp.add(Dropout(0.3))
mlp.add(Dense(64, activation='relu',kernel_regularizer=l1(0.001)))
mlp.add(Dropout(0.2))
mlp.add(Dense(4, activation = "softmax"))


# compile: optimizer & losses/metrics
mlp.compile(loss='categorical_crossentropy', optimizer='sgd',metrics=['accuracy'])
mlp.summary()

In [None]:
filepath_mlp="weights.mlp.hdf5"
checkpoint = ModelCheckpoint(filepath_mlp, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

In [None]:
model_history = mlp.fit(x_train, y_train, batch_size=16, 
                        epochs=300, verbose=0, validation_split = 0.1,
                        callbacks=[checkpoint])

mlp.load_weights(filepath_mlp)

In [None]:
x_plot = list(range(1,301))

def plot_history(network_history):
  plt.figure()
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.plot(x_plot, network_history.history['loss'])
  plt.plot(x_plot, network_history.history['val_loss'])
  plt.legend(['Training', 'Validation'])
  plt.figure()
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
  plt.plot(x_plot, network_history.history['accuracy'])
  plt.plot(x_plot, network_history.history['val_accuracy'])
  plt.legend(['Training', 'Validation'], loc='lower right')
  plt.show()

plot_history(model_history)

In [None]:
score = mlp.evaluate(x_test, y_test, batch_size=16)

score