In [1]:
import zipfile
import os, shutil
import collections
from pathlib import Path
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import optimizers
from keras import layers
from keras.regularizers import l2
from keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from keras.models import Sequential
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten, concatenate
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
from sklearn.utils import shuffle
import pylab as pl
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from sklearn.metrics import confusion_matrix
from sklearn import preprocessing
import itertools
import math


# audio lib
import librosa
import librosa.display
from librosa.util import fix_length

import IPython.display as ipd
from PIL import Image

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
output_folder = "/kaggle/working/melspectrogram_tmp/"
os.mkdir(output_folder)

In [4]:
df = pd.read_csv('../input/my-playlist/my-playlist.csv', sep=';', dtype = str)

del df['Label2']
del df['Label3']

df = df.dropna()

df

Unnamed: 0,ID,Label1
0,Fabrizio De Andr - La canzone di Marinella,0
1,John Coltrane - Naima,1
2,Nirvana - Come As You Are,2
3,Queen - Radio Ga Ga,3
4,Red Hot Chili Peppers - By The Way,3
5,Imagine Dragons - On Top of the World,3
6,Green Day - Time of Your Life,0
7,Oasis - Wonderwall,1
8,Eagles - Hotel California,1
9,Red Hot Chili Peppers - Snow,2


# Fine-Tuning

In [5]:
directory = '../input/my-playlist/Playlist/'

SAMPLE_RATE = 22050
TRACK_DURATION = 480
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION

num_segments = 80
samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)


num_mel = 96
n_fft = 2048
hop_length = 512

num_mel_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

In [6]:
# per ogni file musicale genero uno spectogramma
for filename in df['ID']:
    
    counter = 0
    
    print("Generando spettrogrammi da: "+filename)

    file_path = os.path.join(directory, filename+'.mp3')
    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE,  offset=5.0, duration=TRACK_DURATION)

    # genero spectogrammi per ogni 6 secondi di canzone
    for d in range(num_segments):

                    # calculate start and finish sample for current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # extract mel
                    mel = librosa.feature.melspectrogram(y=signal[start:finish], sr=sample_rate, n_mels=num_mel, hop_length=hop_length)
                    mel = mel.T
                    
                    
                    if (len(mel) == num_mel_vectors_per_segment):
                        
                        counter = counter + 1
                        
                        if counter <= 16: # genero per ogni canzone 4 input per LSTM da 4 frame
                            img_name = str(d) + '.png'

                            fig = plt.figure(figsize=(3, 3), dpi=100)
                            S_dB = librosa.power_to_db(mel.T)
                            img = librosa.display.specshow(S_dB)

                            new_filename = filename+'-'+img_name

                            plt.savefig("{}{}".format(output_folder, new_filename))
                            plt.close(fig)


Generando spettrogrammi da: Fabrizio De Andr - La canzone di Marinella
Generando spettrogrammi da: John Coltrane - Naima
Generando spettrogrammi da: Nirvana - Come As You Are
Generando spettrogrammi da: Queen - Radio Ga Ga
Generando spettrogrammi da: Red Hot Chili Peppers - By The Way
Generando spettrogrammi da: Imagine Dragons - On Top of the World
Generando spettrogrammi da: Green Day - Time of Your Life
Generando spettrogrammi da: Oasis - Wonderwall
Generando spettrogrammi da: Eagles - Hotel California
Generando spettrogrammi da: Red Hot Chili Peppers - Snow
Generando spettrogrammi da: Caparezza - Vengo dalla Luna
Generando spettrogrammi da: Sum 41 - The New Sensation
Generando spettrogrammi da: OneRepublic - Lets Hurt Tonight
Generando spettrogrammi da: System Of A Down - Lonely Day
Generando spettrogrammi da: Fabrizio De Andr - Il Testamento di Tito
Generando spettrogrammi da: Green Day - Wake Me Up When September Ends
Generando spettrogrammi da: Nirvana - Smells Like Teen Spirit


In [7]:
x_train = []
y_train = []




for song in df['ID']:
    print("Elaboro: "+song)


    row = df.loc[df['ID'] == song]
    print("Label: "+row['Label1'].iloc[0])
    
    y_train.append(row['Label1'].iloc[0])
    y_train.append(row['Label1'].iloc[0])
    y_train.append(row['Label1'].iloc[0])
    y_train.append(row['Label1'].iloc[0])
    
    for i in range(16):
        pic = image.load_img((output_folder + song + '-' + str(i) + '.png'))
        x_train.append(np.array(pic))

x_train = np.array(x_train)
y_train = np.array(y_train, dtype = int)



print(x_train.shape)
print(y_train.shape)


Elaboro: Fabrizio De Andr - La canzone di Marinella
Label: 0
Elaboro: John Coltrane - Naima
Label: 1
Elaboro: Nirvana - Come As You Are
Label: 2
Elaboro: Queen - Radio Ga Ga
Label: 3
Elaboro: Red Hot Chili Peppers - By The Way
Label: 3
Elaboro: Imagine Dragons - On Top of the World
Label: 3
Elaboro: Green Day - Time of Your Life
Label: 0
Elaboro: Oasis - Wonderwall
Label: 1
Elaboro: Eagles - Hotel California
Label: 1
Elaboro: Red Hot Chili Peppers - Snow
Label: 2
Elaboro: Caparezza - Vengo dalla Luna
Label: 2
Elaboro: Sum 41 - The New Sensation
Label: 3
Elaboro: OneRepublic - Lets Hurt Tonight
Label: 3
Elaboro: System Of A Down - Lonely Day
Label: 0
Elaboro: Fabrizio De Andr - Il Testamento di Tito
Label: 0
Elaboro: Green Day - Wake Me Up When September Ends
Label: 0
Elaboro: Nirvana - Smells Like Teen Spirit
Label: 2
Elaboro: Led Zeppelin - Stairway to Heaven
Label: 1
Elaboro: System Of A Down - Toxicity
Label: 2
(304, 300, 300, 3)
(76,)


In [8]:
base_net = load_model('../input/effnetb3spectrogram/weights.cnn.hdf5')
layer_name = 'flatten'
feature_extractor = Model(inputs=base_net.input, outputs=base_net.get_layer(layer_name).output)

2022-06-25 14:02:47.625824: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-25 14:02:47.730519: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-25 14:02:47.731376: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-25 14:02:47.733555: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [9]:
feature_train = feature_extractor.predict(x_train, verbose=1)
print(feature_train.shape)

2022-06-25 14:02:53.703763: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-06-25 14:02:55.393466: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


(304, 1280)


In [10]:
reshaped_train = np.reshape(feature_train, (int(feature_train.shape[0] / 4), 4, -1))

x_train, y_train = shuffle(reshaped_train, y_train) # evito di avere tutte i frame consecutivi

x_test = x_train
y_test = y_train

print(reshaped_train.shape)

(76, 4, 1280)


In [11]:
LSTM = load_model('../input/lstmspectrogram/weights.cnn.hdf5')

filepath_LSTM="./weights.LSTM.hdf5"
checkpoint = ModelCheckpoint(filepath_LSTM, monitor='accuracy', verbose=1, save_best_only=True, mode='max')

LSTM.compile(loss='sparse_categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])


LSTM.fit(x_train, y_train, batch_size=32, epochs=50, callbacks=[checkpoint])

Epoch 1/50

Epoch 00001: accuracy improved from -inf to 0.36842, saving model to ./weights.LSTM.hdf5
Epoch 2/50

Epoch 00002: accuracy improved from 0.36842 to 0.46053, saving model to ./weights.LSTM.hdf5
Epoch 3/50

Epoch 00003: accuracy improved from 0.46053 to 0.56579, saving model to ./weights.LSTM.hdf5
Epoch 4/50

Epoch 00004: accuracy did not improve from 0.56579
Epoch 5/50

Epoch 00005: accuracy improved from 0.56579 to 0.72368, saving model to ./weights.LSTM.hdf5
Epoch 6/50

Epoch 00006: accuracy improved from 0.72368 to 0.76316, saving model to ./weights.LSTM.hdf5
Epoch 7/50

Epoch 00007: accuracy did not improve from 0.76316
Epoch 8/50

Epoch 00008: accuracy did not improve from 0.76316
Epoch 9/50

Epoch 00009: accuracy did not improve from 0.76316
Epoch 10/50

Epoch 00010: accuracy did not improve from 0.76316
Epoch 11/50

Epoch 00011: accuracy did not improve from 0.76316
Epoch 12/50

Epoch 00012: accuracy did not improve from 0.76316
Epoch 13/50

Epoch 00013: accuracy impr

<keras.callbacks.History at 0x7f81783491d0>

In [12]:
LSTM.load_weights(filepath_LSTM)

score = LSTM.evaluate(x_test, y_test, verbose=0)
print('Convolutional Neural Network - accuracy:', score[1],"\n")

Convolutional Neural Network - accuracy: 0.9736841917037964 



# Verifica classificazione su playlist

In [13]:
# cancello la directory temporanea e la ricero
shutil.rmtree(output_folder)
os.mkdir(output_folder)


In [14]:
data = {
    'ID' : [],
    'A-V-' : [],
    'A-V+' : [],
    'A+V-' : [],
    'A+V+' : []
}

directory = '../input/my-playlist/Playlist/'

SAMPLE_RATE = 22050
TRACK_DURATION = 480
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION

num_segments = 80
samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)


num_mel = 96
n_fft = 2048
hop_length = 512

num_mel_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

In [15]:
import re
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

In [16]:
print("Classificazione mood canzioni in corso...")
print("Legenda:")
print("0: basso Arousal - basso valence ")
print("1: basso Arousal - alto valence ")
print("2: alto Arousal - basso valence ")
print("3: alto Arousal - alto valence ")
print("----------------------------------------------------")

# per ogni file musicale genero uno spectogramma temporaneo
for filename in os.listdir(directory):
    labels = []
    
    filename_no_extension = Path(filename).stem
    data["ID"].append(filename_no_extension)
    
    file_path = os.path.join(directory, filename)
    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE,  offset=5.0, duration=TRACK_DURATION)

    # genero spectogrammi per ogni 6 secondi di canzone
    for d in range(num_segments):

                    # calculate start and finish sample for current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # extract mel
                    mel = librosa.feature.melspectrogram(y=signal[start:finish], sr=sample_rate, n_mels=num_mel, hop_length=hop_length)
                    mel = mel.T

                    
                    if (len(mel) == num_mel_vectors_per_segment):
                        
                        
                        img_name = str(d) + '.png'

                        


                        fig = plt.figure(figsize=(3, 3), dpi=100)
                        S_dB = librosa.power_to_db(mel.T)
                        img = librosa.display.specshow(S_dB)
                        
                        plt.savefig("{}{}".format(output_folder,img_name))
                        plt.close(fig)

    # per ogni 4 spettrogrammi generati dalla canzone eseguo la predizione e creo una lista con tutte le predizioni riferite ad una canzone
    counter_spectrogram = 0
    pic_array = []
    features_spectrogram = []
    
    dir_spectogram = sorted_alphanumeric(os.listdir(output_folder))
    for i in dir_spectogram:
        
        counter_spectrogram = counter_spectrogram + 1 # incremento il contatore di frame

        # ottengo le features
        file_path = os.path.join(output_folder, i)
        pic = image.load_img(file_path)
        pic = np.array(pic)
        pic_array.append(pic)

            
        if counter_spectrogram == 4:
            pic_array = np.array(pic_array)
            features_spectrogram = feature_extractor.predict(pic_array)
            features_spectrogram = np.reshape(features_spectrogram, (1, 4, -1))
            classification = LSTM.predict(features_spectrogram)
            labels.append(np.argmax(classification))
            
            counter_spectrogram = 0
            pic_array = []
            features_spectrogram = []
    
    
    # creo una lista con le percentuali di mood per ogni canzone

    counter = collections.Counter(labels)
    most_common= [(i, round(counter[i] / len(labels) * 100.0, 1)) for i, count in counter.most_common()]
    
    # trasformo la lista in campi per il dataframe
    i0 = False
    i1 = False
    i2 = False
    i3 = False
    
    for i in range(len(most_common)):
        if most_common[i][0] == 0:
            i0 = True
            data['A-V-'].append(most_common[i][1])
        elif most_common[i][0] == 1:
            i1 = True
            data['A-V+'].append(most_common[i][1])
        elif most_common[i][0] == 2:
            i2 = True
            data['A+V-'].append(most_common[i][1])
        elif most_common[i][0] == 3:
            i3 = True
            data['A+V+'].append(most_common[i][1])
            
    
    if not(i0):
        data['A-V-'].append(0)
    if not(i1):
        data['A-V+'].append(0)
    if not(i2):
        data['A+V-'].append(0)
    if not(i3):
        data['A+V+'].append(0)

            
        
    

    print("File: "+filename)
    print("Predizioni:")
    print(labels)
    print("Mood in percentuale: "+ str(most_common))    
    print("----------------------------------------------------")
    
    # cancello la directory temporanea e la ricero
    shutil.rmtree(output_folder)
    os.mkdir(output_folder)

Classificazione mood canzioni in corso...
Legenda:
0: basso Arousal - basso valence 
1: basso Arousal - alto valence 
2: alto Arousal - basso valence 
3: alto Arousal - alto valence 
----------------------------------------------------
File: Fabrizio De Andr - La canzone di Marinella.mp3
Predizioni:
[0, 0, 0, 0, 0, 3, 0]
Mood in percentuale: [(0, 85.7), (3, 14.3)]
----------------------------------------------------
File: John Coltrane - Naima.mp3
Predizioni:
[1, 1, 1, 1, 0, 0, 0, 1, 1, 1]
Mood in percentuale: [(1, 70.0), (0, 30.0)]
----------------------------------------------------
File: Nirvana - Come As You Are.mp3
Predizioni:
[2, 2, 2, 2, 3, 3, 3, 3, 0]
Mood in percentuale: [(2, 44.4), (3, 44.4), (0, 11.1)]
----------------------------------------------------
File: Queen - Radio Ga Ga.mp3
Predizioni:
[3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 1, 3, 0]
Mood in percentuale: [(3, 71.4), (2, 14.3), (1, 7.1), (0, 7.1)]
----------------------------------------------------
File: Red Hot Chili Pe

In [17]:
df = pd.DataFrame(data)
df.to_csv('finetuned1.csv',index=False)

df

Unnamed: 0,ID,A-V-,A-V+,A+V-,A+V+
0,Fabrizio De Andr - La canzone di Marinella,85.7,0.0,0.0,14.3
1,John Coltrane - Naima,30.0,70.0,0.0,0.0
2,Nirvana - Come As You Are,11.1,0.0,44.4,44.4
3,Queen - Radio Ga Ga,7.1,7.1,14.3,71.4
4,Red Hot Chili Peppers - By The Way,0.0,0.0,12.5,87.5
5,Imagine Dragons - On Top of the World,0.0,14.3,0.0,85.7
6,Green Day - Time of Your Life,83.3,16.7,0.0,0.0
7,Oasis - Wonderwall,11.1,66.7,0.0,22.2
8,Eagles - Hotel California,50.0,25.0,6.2,18.8
9,Red Hot Chili Peppers - Snow,27.3,18.2,36.4,18.2
