## 1. Import library

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
import os
import cv2
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import librosa
from scipy.io import wavfile

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D,BatchNormalization, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay

In [7]:
dir25 = "Dataset25/MER_Extra25"
data25 = os.listdir(dir25)
file_dir25 = []
count = 0

for mooddir25 in data25:
    if(not r'.' in mooddir25):
        moodDirName25 = os.path.join(dir25,mooddir25)
        mooddir25=np.array(os.listdir(moodDirName25))
        for audio in mooddir25:
            if(audio[-3:]=='mp3'):
                count+=1
                fn=os.path.join(moodDirName25,audio)
                file_dir25.append(fn)
                print(str(count),fn)

1 Dataset25/MER_Extra25/happy/ovalar_gurup_yol.mp3
2 Dataset25/MER_Extra25/happy/cekirge_oguz_yilmaz.mp3
3 Dataset25/MER_Extra25/happy/baran_bayraktar_gol.mp3
4 Dataset25/MER_Extra25/happy/bu_gece_uyumamisan.mp3
5 Dataset25/MER_Extra25/happy/kim_arar_nilufer.mp3
6 Dataset25/MER_Extra25/happy/heycan_serdar_ortac.mp3
7 Dataset25/MER_Extra25/happy/su_gelen_atlimidir.mp3
8 Dataset25/MER_Extra25/happy/yuru_dilber_yuru_fasil.mp3
9 Dataset25/MER_Extra25/happy/arzu_kus_adana_kopru_basi.mp3
10 Dataset25/MER_Extra25/happy/evlere_senlik_sinan_ozen.mp3
11 Dataset25/MER_Extra25/happy/ayse_ozyilmazel_pardon.mp3
12 Dataset25/MER_Extra25/happy/yangin_olur_biz_yangina_gideriz_fasli_beyoglu.mp3
13 Dataset25/MER_Extra25/happy/uskudara_gideriken.mp3
14 Dataset25/MER_Extra25/happy/cay_elinden_oteye_sevval_sam.mp3
15 Dataset25/MER_Extra25/happy/mustafa_sandal_aya_benzer.mp3
16 Dataset25/MER_Extra25/happy/gul_doktum_yollarina_tarkan.mp3
17 Dataset25/MER_Extra25/happy/sevinc_sarivea_yandirgin_kalbimi_aman.mp3

## 2. Data Augmentation

In [None]:
#add noise
def add_noise(data):
    wn = np.random.normal(0, 1, len(data))
    return np.where(data != 0.0, data.astype('float64') + 0.02 * wn, 0.0).astype(np.float32)

In [None]:
def time_shift(data, shift):
    # shift：shifting lenth
    return np.roll(data, int(shift))

In [None]:
for file in file_dir25:
    m = os.path.basename(os.path.dirname(file))
    f_noise = "Dataset25/MER_Extra25_Augmented/Noise/"+m+"/"+ 'noise_'+os.path.basename(file)
    f_shift = "Dataset25/MER_Extra25_Augmented/TimeShift/"+m+"/"+ 'shift_'+os.path.basename(file)
    data,sr = librosa.load(file)
    data_noise = add_noise(data)
    data_shift = time_shift(data, shift=sr)
    wavfile.write(f_noise,sr,data_noise)
    wavfile.write(f_shift,sr,data_shift)

In [None]:
dir_noise = "Dataset25/MER_Extra25_Augmented/Noise/"
data_noise = os.listdir(dir_noise)
file_noise = []
count = 0

for mooddir in data_noise:
    if(not r'.' in mooddir):
        moodDirName = os.path.join(dir_noise,mooddir)
        mooddir=np.array(os.listdir(moodDirName))
        for audio in mooddir:
            if(audio[-3:]=='mp3'):
                count+=1
                fn=os.path.join(moodDirName,audio)
                file_noise.append(fn)

In [None]:
duration_noise = []
for i in range(len(file_noise)):
    y,sr = librosa.load(file_noise[i])
    d = librosa.get_duration(y=y,sr=sr)
    duration_noise.append(d)

In [None]:
for i in range(len(duration_noise)):
    if duration_noise[i]!=25.5:
        print(i,duration_noise[i])

In [None]:
dir_shift = "Dataset25/MER_Extra25_Augmented/TimeShift/"
data_shift = os.listdir(dir_shift)
file_shift = []

for mooddir in data_shift:
    if(not r'.' in mooddir):
        moodDirName = os.path.join(dir_shift,mooddir)
        mooddir=np.array(os.listdir(moodDirName))
        for audio in mooddir:
            if(audio[-3:]=='mp3'):
                fn=os.path.join(moodDirName,audio)
                file_shift.append(fn)

In [None]:
duration_shift = []
for i in range(len(file_shift)):
    y,sr = librosa.load(file_shift[i])
    d = librosa.get_duration(y=y,sr=sr)
    duration_shift.append(d)

In [None]:
for i in range(len(duration_shift)):
    if duration_shift[i]!=25.5:
        print(i,duration_shift[i])

## 4. 1D Feature Extraction

In [None]:
import re
def key_scale_finder(file):
    pitches = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
    y,sr = librosa.load(file)
    key = librosa.feature.chroma_stft(y=y,sr=sr).sum(axis=1).argmax()
    if re.search("\#$",pitches[key]):
        scale = "minor"
    else:
        scale = "major"
    return scale,pitches[key]

def feature_1d(file):
    try:
        y,sr = librosa.load(file)
    except:
        print('No such file')
        quit()
    
    f = [] 
    
    # tempo
    tempo = librosa.beat.tempo(y=y,sr=sr)[0]
    f.append(tempo)  
    
    # RMS
    #S, phase = librosa.magphase(librosa.stft(y))
    rms = librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_var = np.var(rms)
    f.append(rms_mean)
    f.append(rms_var)
    
    # chroma features
    chroma = librosa.feature.chroma_stft(y=y,sr=sr)
    chroma_mean = np.mean(chroma)
    chroma_var = np.var(chroma)
    f.append(chroma_mean)
    f.append(chroma_var)
    
    # spectral centroid
    centroid = librosa.feature.spectral_centroid(y=y)
    centroid_mean = np.mean(centroid)
    centroid_var = np.var(centroid)
    f.append(centroid_mean)
    f.append(centroid_var)
    
    # spectral rolloff
    rolloff = librosa.feature.spectral_rolloff(y=y+0.01, sr=sr)
    rolloff_mean = np.mean(rolloff)
    rolloff_var = np.var(rolloff)
    f.append(rolloff_mean)
    f.append(rolloff_var)
    
    # zero crossing rate
    zcr = librosa.feature.zero_crossing_rate(y=y)
    zcr_mean = np.mean(zcr)
    zcr_var = np.var(zcr)
    f.append(zcr_mean)
    f.append(zcr_var)
    
    # tonnetz
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean = np.mean(tonnetz)
    tonnetz_var  = np.var(tonnetz)
    f.append(tonnetz_mean)
    f.append(tonnetz_var)
    
    #mel
    s = librosa.feature.melspectrogram(y=y, sr=sr)
    mel = librosa.amplitude_to_db(s, ref=np.max)
    mel_mean = np.mean(mel)
    mel_var = np.var(mel)
    f.append(mel_mean)
    f.append(mel_var)
    
    #mfcc
    mfcc = librosa.feature.mfcc(y=y,sr=sr,n_mfcc=20)
    m = len(mfcc)
    mfcc_mean = np.zeros((m,))
    mfcc_var = np.zeros((m,))
    for i in range(len(mfcc)):
        mfcc_mean[i] = np.mean(mfcc[i])
        mfcc_var[i]= np.var(mfcc[i])
        f.append(mfcc_mean[i])
        f.append(mfcc_var[i])
    
    return np.array(f)

In [None]:
#Extract Key and Scale
noise_key = []
noise_scale = []
for i in range(len(file_noise)):
    noise_key.append(key_scale_finder(file_noise[i])[1])
    noise_scale.append(key_scale_finder(file_noise[i])[0])

In [None]:
#Feature Vectors
noise_features = np.zeros((len(file_noise),55))
for i in range(0,len(file_noise)):
    noise_features[i] = feature_1d(file_noise[i])

In [None]:
# Label the mood
mood_noise = []
for i in range(len(file_noise)):
    m = os.path.basename(os.path.dirname(file_noise[i]))
    mood_noise.append(m)

In [None]:
shift_key = []
shift_scale = []
for i in range(len(file_shift)):
    shift_key.append(key_scale_finder(file_shift[i])[1])
    shift_scale.append(key_scale_finder(file_shift[i])[0])

In [None]:
#Feature Vectors
shift_features = np.zeros((len(file_shift),55))
for i in range(0,len(file_shift)):
    shift_features[i] = feature_1d(file_shift[i])

In [None]:
# Label the mood
mood_shift = []
for i in range(len(file_shift)):
    m = os.path.basename(os.path.dirname(file_shift[i]))
    mood_shift.append(m)

In [None]:
# Store 15 features to data frame
col_name = ['tempo','rms_mean','rms_var','chroma_mean','chroma_var',
            'centroid_mean','centroid_var','rolloff_mean','roll_off_var',
            'zcr_mean','zcr_var','tonnetz_mean', 'tonnetz_var', 'mel_mean', 'mel_var']
# Store 20 mfccs mean and var to data frame
mfccs_col_name = []
for i in range(0,20):
    mfccs_col_name.append("mfcc_mean_"+str(i+1))
    mfccs_col_name.append("mfcc_var_"+str(i+1))

In [None]:
# Create a data frame to store all the data
features_noise = pd.DataFrame()

In [None]:
# Store file name
features_noise['file'] = file_noise
# Store key and scale
features_noise['scale'] = noise_scale
features_noise['key'] = noise_key
# Store 13 features and 20 mfccs
for i in range(len(col_name)):
    features_noise[col_name[i]] = noise_features[:,i]
for i in range(len(mfccs_col_name)):
    features_noise[mfccs_col_name[i]] = noise_features[:,i+15]
# Store mood
features_noise['mood'] = mood_noise

In [None]:
features_noise

Unnamed: 0,file,scale,key,tempo,rms_mean,rms_var,chroma_mean,chroma_var,centroid_mean,centroid_var,...,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20,mood
0,Dataset25/MER_Extra25_Augmented/Noise/happy/no...,major,G,129.199219,0.123657,0.002290,0.450769,0.071640,3527.177697,145534.210190,...,25.420286,-6.139956,31.976683,-5.387671,33.080315,-5.970808,30.715906,-0.587695,26.289206,happy
1,Dataset25/MER_Extra25_Augmented/Noise/happy/no...,major,B,95.703125,0.110031,0.000791,0.469054,0.077547,3589.934187,131813.935802,...,34.345291,0.804220,42.412483,-5.472694,28.169796,-7.094440,36.845860,-2.276625,32.522129,happy
2,Dataset25/MER_Extra25_Augmented/Noise/happy/no...,major,F,143.554688,0.090611,0.000647,0.337658,0.084095,3285.720215,153058.154063,...,48.989479,-3.415442,35.409660,0.739723,35.402435,1.209225,41.586258,-3.015752,53.627033,happy
3,Dataset25/MER_Extra25_Augmented/Noise/happy/no...,major,E,107.666016,0.098161,0.001865,0.508561,0.082164,3682.237990,371151.829382,...,35.784252,1.985919,30.619648,1.720177,38.473392,0.405217,42.687397,4.041178,40.066433,happy
4,Dataset25/MER_Extra25_Augmented/Noise/happy/no...,minor,F#,112.347147,0.145648,0.002942,0.303056,0.091024,3075.189874,359886.875332,...,29.795027,-7.255356,32.535435,-5.044879,35.918560,-2.459831,29.973581,-4.086148,44.970478,happy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,Dataset25/MER_Extra25_Augmented/Noise/relaxed/...,major,B,117.453835,0.053091,0.000537,0.283036,0.099551,4435.002225,332571.236805,...,21.118633,-4.209631,24.833616,-2.247054,33.619629,-0.776797,55.768936,0.623074,59.044998,relaxed
396,Dataset25/MER_Extra25_Augmented/Noise/relaxed/...,minor,G#,103.359375,0.128090,0.002453,0.278261,0.087915,3067.593518,459000.617770,...,54.005333,-2.019766,67.229233,-3.219717,62.610306,-5.167824,40.666725,-7.431190,48.216042,relaxed
397,Dataset25/MER_Extra25_Augmented/Noise/relaxed/...,minor,G#,129.199219,0.082541,0.002091,0.327736,0.106215,3991.360311,605786.122159,...,23.781773,-3.567219,24.313950,-4.167338,27.034754,-3.870866,24.221720,-2.850541,30.034101,relaxed
398,Dataset25/MER_Extra25_Augmented/Noise/relaxed/...,minor,F#,107.666016,0.121641,0.003615,0.321115,0.092604,3412.727204,481960.468863,...,61.010330,-0.693847,44.039516,0.162756,48.789814,0.010579,76.329330,-0.412611,80.167099,relaxed


In [None]:
# Create the Excel file
pd.ExcelWriter('Features1D/Features/Features_noise.xlsx')

<pandas.io.excel._xlsxwriter.XlsxWriter at 0x7f7f50ceb2e0>

In [None]:
# Store the Feature into excel file
features_noise.to_excel('Features1D/Features/Features_noise.xlsx')

In [None]:
# Create a data frame to store all the data
features_shift = pd.DataFrame()

In [None]:
# Store file name
features_shift['file'] = file_shift
# Store key and scale
features_shift['scale'] = shift_scale
features_shift['key'] = shift_key
# Store 13 features and 20 mfccs
for i in range(len(col_name)):
    features_shift[col_name[i]] = shift_features[:,i]
for i in range(len(mfccs_col_name)):
    features_shift[mfccs_col_name[i]] = shift_features[:,i+15]
# Store mood
features_shift['mood'] = mood_shift

In [None]:
features_shift

Unnamed: 0,file,scale,key,tempo,rms_mean,rms_var,chroma_mean,chroma_var,centroid_mean,centroid_var,...,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20,mood
0,Dataset25/MER_Extra25_Augmented/TimeShift/happ...,major,B,107.666016,0.068638,0.000536,0.356860,0.079414,3002.499593,243224.839492,...,51.223255,-8.141541,30.072334,-7.611277,34.131454,-5.402782,42.941715,-1.860485,42.510563,happy
1,Dataset25/MER_Extra25_Augmented/TimeShift/happ...,minor,C#,107.666016,0.203764,0.004079,0.356433,0.087369,2256.236238,362717.916961,...,39.745594,-5.030432,51.442932,-3.397173,50.442814,-5.992489,50.791504,-2.764585,67.640572,happy
2,Dataset25/MER_Extra25_Augmented/TimeShift/happ...,major,B,117.453835,0.105277,0.001507,0.339377,0.085241,2078.712055,139735.448517,...,54.267632,-11.609016,60.767479,-3.560351,65.094589,-6.228119,56.747627,-3.658868,49.049080,happy
3,Dataset25/MER_Extra25_Augmented/TimeShift/happ...,major,G,103.359375,0.193651,0.004254,0.376314,0.082671,2182.611156,290151.630771,...,40.466801,-5.594726,29.870472,-0.020278,46.112438,-2.546770,61.008266,-0.625277,68.597359,happy
4,Dataset25/MER_Extra25_Augmented/TimeShift/happ...,major,A,129.199219,0.093911,0.000538,0.288401,0.088281,2194.235689,327044.046804,...,67.925270,-10.275117,52.130547,8.992598,52.320457,-18.666664,74.009750,0.182337,80.654190,happy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,Dataset25/MER_Extra25_Augmented/TimeShift/rela...,major,B,117.453835,0.050528,0.000970,0.193335,0.079589,843.611374,20587.797596,...,75.373077,-7.174260,46.161484,-6.057247,91.641174,-4.171055,117.979599,1.829784,193.483566,relaxed
396,Dataset25/MER_Extra25_Augmented/TimeShift/rela...,minor,C#,112.347147,0.121430,0.002034,0.297599,0.091206,2095.007226,791069.356065,...,129.997116,4.307540,172.319016,6.716820,168.036362,0.992625,124.061119,3.944699,183.621979,relaxed
397,Dataset25/MER_Extra25_Augmented/TimeShift/rela...,minor,F#,143.554688,0.098090,0.002621,0.255303,0.093122,1623.337974,800516.177769,...,78.315529,12.965606,149.925980,7.460831,130.234375,-6.891764,84.928642,-10.039824,117.866913,relaxed
398,Dataset25/MER_Extra25_Augmented/TimeShift/rela...,major,A,112.347147,0.149233,0.005923,0.270471,0.085332,818.623609,171610.214221,...,49.399548,-5.608318,46.657879,-10.345454,46.497845,-16.767996,62.470875,-9.490905,67.166382,relaxed


In [None]:
# Create the Excel file
pd.ExcelWriter('Features1D/Features/Features_shift.xlsx')

<pandas.io.excel._xlsxwriter.XlsxWriter at 0x7f7d6764beb0>

In [None]:
# Store the Feature into excel file
features_shift.to_excel('Features1D/Features/Features_shift.xlsx')

## 4. 2D Feature Extraction

In [None]:
n_noise = len(file_noise)
spec_noise = np.empty([n_noise, 1025, 1099])
mfccs_noise = np.empty([n_noise, 20, 1099])
mel_noise = np.empty([n_noise, 128, 1099])

n_shift = len(file_shift)
spec_shift = np.empty([n_shift, 1025, 1099])
mfccs_shift = np.empty([n_shift, 20, 1099])
mel_shift = np.empty([n_shift, 128, 1099])

In [None]:
def feature2d(file):
    try:
        y,sr = librosa.load(file)
    except:
        print('No such file')
        quit()
            
    # Spectrogram
    stft = librosa.stft(y)
    stft_db = librosa.amplitude_to_db(abs(stft))
    spec = stft_db
    # MFCC
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    # Mel
    s = librosa.feature.melspectrogram(y=y, sr=sr)
    mel = librosa.amplitude_to_db(s, ref=np.max)

    return spec, mfccs, mel

In [None]:
def getMood(mood_list, file_list):
    for i in range(len(file_list)):
        m = os.path.basename(os.path.dirname(file_list[i]))
        mood_list.append(m)
    return mood_list

In [None]:
for i in range(0,n_noise):
    spec_noise[i], mfccs_noise[i], mel_noise[i] = feature2d(file_noise[i])
for i in range(0,n_shift):
    spec_shift[i], mfccs_shift[i], mel_shift[i] = feature2d(file_shift[i])

In [None]:
mood_noise_2d = []
mood_noise_2d = getMood(mood_noise_2d,file_noise)

In [None]:
mood_shift_2d = []
mood_shift_2d = getMood(mood_shift_2d,file_shift)

In [None]:
le = LabelEncoder()
moodtrans_noise =le.fit(np.array(mood_noise_2d))
moodtrans_noise=le.transform(np.array(mood_noise_2d))

moodtrans_shift =le.fit(np.array(mood_shift_2d))
moodtrans_shift=le.transform(np.array(mood_shift_2d))

In [None]:
np.savez_compressed("Features2D/npz/Features_noise.npz", 
                    spec = spec_noise, 
                    mfcc = mfccs_noise,
                    mel = mel_noise,
                    target = moodtrans_noise)

In [None]:
np.savez_compressed("Features2D/npz/Features_shift.npz", 
                    spec = spec_shift, 
                    mfcc = mfccs_shift,
                    mel = mel_shift,
                    target = moodtrans_shift)