In [1]:
import pandas as pd
import numpy as np
import os
import IPython.display as ipd

from pydub import AudioSegment
import librosa
import math

from sklearn.externals import joblib

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))

In [2]:
path_to_file = 'C:/Users/USER/Downloads/Tugas Akhir/dataset/tes/'
path_to_fitur = "C:/Users/USER/Downloads/Tugas Akhir/dataset/tes/fitur/"
path_to_model = "C:/Users/USER/Downloads/Tugas Akhir/dataset/model/"

In [3]:
# #Get Feature

# #MFCC
def mfcc(y,sr,file):
    vector = list()
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) #13 is default dimension 512 frame
    data = pd.DataFrame(mfcc)
    mean = data.mean(axis = 1).values #Get Mean
    std = data.std(axis = 1).values #Get std
    vector.append(np.insert(mean,13,std))
    save = pd.DataFrame(vector)

    save.to_csv(path_to_fitur+file[:-4]+"/mfcc.csv", index=False, header=False)

# #TIMBRE // CENTROID // FLUX // ROLLOFF // ZERO CROSSING 
def timbre(y,sr,file):
    vector = list()
    cent = librosa.feature.spectral_centroid(y=y, sr=sr) #1 is default dimension 512 frame
    flux = librosa.onset.onset_strength(y=y, sr=sr) #1 is default dimension 512 frame
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) #1 is default dimension 512 frame
    zc = librosa.feature.zero_crossing_rate(y) #1 is default dimension 512 frame
    
    a = pd.DataFrame(cent) 
    b = pd.DataFrame(flux).T
    c = pd.DataFrame(rolloff)
    d = pd.DataFrame(zc)
    
    frame = [a,b,c,d]
    data = pd.concat(frame)
    mean = data.mean(axis = 1).values #Get Mean
    std = data.std(axis = 1).values #Get std
    vector.append(np.insert(mean,4,std))
    save = pd.DataFrame(vector)
    
    save.to_csv(path_to_fitur+file[:-4]+"/timbre.csv", index=False, header=False)

# #SCF and SFM
def flatness(y,sr,file):
    vector = list()
    flatness = librosa.feature.spectral_flatness(y=y)
    data = pd.DataFrame(flatness)
    mean = data.mean(axis = 1).values #Get Mean
    std = data.std(axis = 1).values #Get std
    vector.append(np.insert(mean,1,std))
    save = pd.DataFrame(vector)
    
    save.to_csv(path_to_fitur+file[:-4]+"/flatness.csv", index=False, header=False)
    
def crest(y,sr,file):
    peak = y.max()
    rms = librosa.feature.rmse(y=y)
    n = rms.size
    square = rms**2
    rms_v2 = math.sqrt((1/n)*(square.sum()))
    crest = [peak/rms_v2]
    
    crest_to_csv = pd.DataFrame(crest)
    
    crest_to_csv.to_csv(path_to_fitur+file[:-4]+"/crest.csv", index=False, header=False)

# #chroma
def chroma(y,sr,file):
    vector = list()
    chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=24) #24 is default dimension 512 frame
    data = pd.DataFrame(chroma)
    mean = data.mean(axis = 1).values #Get Mean
    std = data.std(axis = 1).values #Get std
    vector.append(np.insert(mean,24,std))
    save = pd.DataFrame(vector)
    
    save.to_csv(path_to_fitur+file[:-4]+"/chroma.csv", index=False, header=False)

def fitur(file):
    
    y, sr = librosa.load(path_to_file+'wav/'+file, sr=22050) #load 45 and with same freq
    
    mfccs = mfcc(y,sr,file) #mfcc
    timbres = timbre(y,sr,file) #timbre
    flatnesss = flatness(y,sr,file)
    crests = crest(y,sr,file)
    chromas = chroma(y,sr,file) #chroma

def merge(case1,case2):
    case_a = pd.DataFrame(case1)
    case_b = pd.DataFrame(case2)
    
    merged = case_a.merge(case_b, how='outer', left_index=True, right_index=True)
    
    return merged

def flat(song,fitur):
    vector = list()

    data = pd.read_csv(path_to_fitur+str(song)+"/"+fitur+".csv",header=None)
    vector.append(data.loc[0].values)

    return vector

def case1(file):
    fitur = 'mfcc'
    result = flat(file,fitur)
    df = pd.DataFrame(result)
    
    return df

def case2(file):
    case_1 = case1(file)
    fitur = 'timbre'
    case_2 = flat(file,fitur)
    result = merge(case_1,case_2)
    
    return result

def case3(file):
    case_2 = case2(file)
    fitur1 = 'crest'
    fitur2 = 'flatness'
    crest = flat(file,fitur1)
    flatness = flat(file,fitur2)
    results = merge(case_2,crest)
    result = merge(results,flatness)
    
    return result

def case4(file):
    case_3 = case3(file)
    fitur = 'chroma'
    chroma = flat(file,fitur)
    result = merge(case_3,chroma)
    
    return result

In [25]:
for filename in os.listdir(path_to_file):
    if os.path.isfile(os.path.join(path_to_file, filename)):

        #Convert Wav
        src = path_to_file+filename
        dst = path_to_file+'wav/'+filename[:-4]+".wav"   

        sound = AudioSegment.from_mp3(src)
        sound.export(dst, format="wav")
        print("convert successfully")
        #make dir
        try:  
            os.mkdir(path_to_fitur+filename[:-4])
        except OSError:  
            print ("Creation of the directory %s failed" % path_to_fitur)
        
        # #Feature
        fiture = fitur(filename[:-4]+'.wav')
        print("feature successfully")
        
        # #Flatten
        results = case4(filename[:-4])
        print("flatten successfully")

convert successfully
Creation of the directory C:/Users/USER/Downloads/Tugas Akhir/dataset/tes/fitur/ failed
feature successfully
flatten successfully


In [26]:
#Load Model
scaler.fit(results.T)
normalized = scaler.transform(results.T)

arousal = path_to_model+'arousal/case4norm_svr.sav'
valence = path_to_model+'valence/case4norm_svr.sav'

arousal_model = joblib.load(arousal)
a = arousal_model.predict(normalized.T)

valence_model = joblib.load(valence)
v = valence_model.predict(normalized.T)

In [18]:
print(a,v) #peterpan case 1

[6.36951413] [5.62759665]


In [21]:
print(a,v) #peterpan case 2

[3.91831923] [2.28111924]


In [24]:
print(a,v) #peterpan case 3

[4.34996339] [4.0260475]


In [27]:
print(a,v) #peterpan case 4

[4.27011922] [2.5927828]


In [6]:
print(a,v) #adele case 1

[5.50171702] [5.58375219]


In [9]:
print(a,v) #adele case 2

[3.41536407] [1.6805381]


In [12]:
print(a,v) #adele case 3

[3.94417015] [3.56805811]


In [15]:
print(a,v) #adele case 4

[4.02508202] [2.48251571]
