In [1]:
import pandas as pd
import numpy as np
from sklearn.externals import joblib

from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process.kernels import RationalQuadratic, Matern
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

from sklearn.model_selection import GridSearchCV

In [2]:
# #PATH
# path_info = "F:/178/Tugas Akhir/dataset/annotations/"
# path_to_fitur = "F:/178/Tugas Akhir/dataset/coba fitur/"
# path_to_file = "F:/178/Tugas Akhir/dataset/clips_45sec_wav/"
# path_to_experiment ="F:/178/Tugas Akhir/dataset/experiment/"
# path_to_model = "F:/178/Tugas Akhir/dataset/model/"
# path_to_predict = "F:/178/Tugas Akhir/dataset/predict/"

path_info = "C:/Users/USER/Downloads/Tugas Akhir/dataset/annotations/"
path_to_fitur = "C:/Users/USER/Downloads/Tugas Akhir/dataset/coba fitur/"
path_to_file = "C:/Users/USER/Downloads/Tugas Akhir/dataset/clips_45sec_wav/"
path_to_experiment ="C:/Users/USER/Downloads/Tugas Akhir/dataset/experiment/"
path_to_model = "C:/Users/USER/Downloads/Tugas Akhir/dataset/model/"
path_to_predict = "C:/Users/USER/Downloads/Tugas Akhir/dataset/predict/"

In [3]:
def load(fitur):
    data = pd.read_csv(path_to_experiment + fitur+".csv",header=None)
    ground = pd.read_csv(path_info + "static_annotations.csv")

    arousal_mean = ground['mean_arousal']
    valence_mean = ground['mean_valence']
    
    return data,arousal_mean,valence_mean

def split(data,value):
    train_data, test_data, train_values, test_values = train_test_split(data,value,test_size=0.3,random_state=1000)
    
    return train_data, test_data, train_values, test_values
    
def predict(data,values,test,true,clf):
    trained = clf.fit(data,values)
    a = clf.predict(test)
    df = pd.DataFrame(data=[a,true])
    
    return a,trained,df

def score(a,true):
    r2 = r2_score(true, a)
    
    return r2

def gridsearch(data,true):
    model = GPR()
#     1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1)
    Cs = [1,1e1,1e2,1e3,1e4,1e5]
    gammas = [1,1e-1,1e-2,1e-3,1e-4,1e-5]
    parameters = {'C':Cs,'gamma':gammas}

    clf = GridSearchCV(model, parameters, cv=10, scoring='r2')
    clf.fit(data,true)

    return clf.best_params_ , clf.best_score_

def regresion(clf,data,values):
    scores = cross_val_score(clf,data,values,scoring='r2',cv=10)
    
    return scores

def save_model(model,case,types):
    path = path_to_model+types+case+'norm_svr.sav' # save the model
    joblib.dump(model, path)

def save_predict(df1,df2,case):
    path = path_to_predict+case+"_norm_svr.xlsx" # save the model
    arousal = df1.T
    valence = df2.T
    merged = arousal.merge(valence, how='outer', left_index=True, right_index=True)
    merged.to_excel(path,header=None,index=None)

# CASE 1

In [4]:
datafitur = "case1" #CASE 1

data,arousal_mean,valence_mean = load(datafitur) #Load
print(data.shape)

train_data_a, test_data_a, train_values_a, test_values_a = split(data,arousal_mean) #split arousal
train_data_v, test_data_v, train_values_v, test_values_v = split(data,valence_mean) #split valence
print(train_data_a.shape)

(744, 26)
(520, 26)


GRIDSEARCHCV

In [None]:
param_a, score_a = gridsearch(data,arousal_mean) #Gridsearch Arousal
param_v, score_v = gridsearch(data,valence_mean) #Grinsearch Valence

In [6]:
print("parameter Arousal:\t",param_a)
print("parameter Valence:\t",param_v)

parameter Arousal:	 {'C': 10.0, 'gamma': 0.1}
parameter Valence:	 {'C': 100.0, 'gamma': 0.01}


INITIAL MODEL

In [6]:
kernel = RationalQuadratic()

svr_case1_a = GPR(1.0 * kernel) #init Arousal
svr_case1_v = GPR(1.0 * kernel) #init Valence

TRAIN

In [7]:
# #Predict Arousal
a_a,train_a,df1_a = predict(train_data_a, train_values_a, test_data_a, test_values_a, svr_case1_a)
# #Predict Valence
a_v,train_v,df1_v = predict(train_data_v, train_values_v, test_data_v, test_values_v, svr_case1_v)

r2_a = score(a_a,test_values_a) #Score Arousal
r2_v = score(a_v,test_values_v) #Score Valence

AROUSAL

In [8]:
df1_a = df1_a.round(2)

display(df1_a.head())
print("R2 Score Arousal:\t%0.2f" % r2_a)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,4.63,5.98,6.54,3.87,3.65,5.09,3.3,6.1,3.61,5.11,...,6.32,4.81,5.14,4.95,4.44,4.9,5.4,6.41,4.65,3.22
1,6.2,5.9,6.4,3.1,3.5,3.6,3.0,5.8,3.6,6.2,...,7.0,5.0,3.7,5.8,4.7,4.9,4.8,6.3,3.3,2.2


R2 Score Arousal:	0.63


VALENCE

In [9]:
df1_v = df1_v.round(2)

display(df1_v.head())
print("R2 Score Valence: %0.2f" % r2_v)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,5.24,5.71,5.37,4.18,4.42,5.46,4.39,5.66,4.08,5.31,...,5.12,5.28,4.88,5.29,5.1,5.31,5.0,5.42,5.03,4.22
1,2.7,6.4,6.0,2.7,5.0,3.2,4.6,4.3,4.1,6.7,...,6.6,4.8,2.5,5.6,3.8,4.4,6.5,6.5,4.0,3.5


R2 Score Valence: 0.23


REGRESSION

In [10]:
scores_a = regresion(svr_case1_a,data,arousal_mean) #Arousal Regression
scores_v = regresion(svr_case1_v,data,valence_mean) #Arousal Regression

AROUSAL

In [11]:
print("R2 Score Valence:\t",scores_a)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_a.mean(), scores_a.std() ))

R2 Score Valence:	 [0.37866231 0.46483577 0.50682721 0.37743884 0.66800838 0.54614637
 0.53831074 0.51049998 0.63653439 0.59302153]
R2 Score Valence:	0.52 (+/- 0.09)


VALENCE

In [12]:
print("R2 Score Valence:\t",scores_v)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_v.mean(), scores_v.std() ))

R2 Score Valence:	 [-0.13293561 -0.04865083 -0.01720641 -0.28684624  0.3831295   0.09474223
  0.14761786  0.22003463  0.29359797  0.06731212]
R2 Score Valence:	0.07 (+/- 0.19)


In [14]:
# save_predict(df1_a.round(2),df1_v.round(2),datafitur)
# save_model(train_a,datafitur,'arousal/') #save model Arousal
# save_model(train_v,datafitur,'valence/') #save model Valence

# CASE 2

In [13]:
datafitur = "case2" #CASE 2

data,arousal_mean,valence_mean = load(datafitur) #Load
print(data.shape)

train_data_a, test_data_a, train_values_a, test_values_a = split(data,arousal_mean) #split arousal
train_data_v, test_data_v, train_values_v, test_values_v = split(data,valence_mean) #split valence

(744, 34)


GRIDSEARCH

In [16]:
param_a, score_a = gridsearch(data,arousal_mean) #Gridsearch Arousal
param_v, score_v = gridsearch(data,valence_mean) #Grinsearch Valence



In [17]:
print("parameter Arousal:\t",param_a)
print("parameter Valence:\t",param_v)

parameter Arousal:	 {'C': 10000.0, 'gamma': 0.001}
parameter Valence:	 {'C': 100.0, 'gamma': 0.01}


INITIAL MODEL

In [14]:
kernel = RationalQuadratic()

svr_case2_a = GPR(1.0 * kernel) #init Arousal
svr_case2_v = GPR(1.0 * kernel) #init Valence


TRAIN

In [15]:
# #Predict Arousal
a_a,train_a,df2_a = predict(train_data_a, train_values_a, test_data_a, test_values_a, svr_case2_a)
# #Predict Valence
a_v,train_v,df2_v = predict(train_data_v, train_values_v, test_data_v, test_values_v, svr_case2_v)

r2_a = score(a_a,test_values_a) #Score Arousal
r2_v = score(a_v,test_values_v) #Score Valence

AROUSAL

In [16]:
df2_a = df2_a.round(2)

display(df2_a.head())
print("R2 Score Arousal:\t%0.2f" % r2_a)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,5.09,5.8,6.57,3.56,3.35,4.43,3.25,6.09,3.32,5.25,...,6.34,5.19,4.9,4.83,5.16,4.71,5.5,6.38,4.08,2.88
1,6.2,5.9,6.4,3.1,3.5,3.6,3.0,5.8,3.6,6.2,...,7.0,5.0,3.7,5.8,4.7,4.9,4.8,6.3,3.3,2.2


R2 Score Arousal:	0.65


VALENCE

In [17]:
df2_v = df2_v.round(2)

display(df2_v.head())
print("R2 Score Valence:\t%0.2f" % r2_v)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,5.33,5.57,5.43,4.47,4.15,5.02,4.49,5.66,3.94,5.42,...,5.22,5.81,4.9,5.05,5.39,5.05,5.06,5.39,4.62,3.89
1,2.7,6.4,6.0,2.7,5.0,3.2,4.6,4.3,4.1,6.7,...,6.6,4.8,2.5,5.6,3.8,4.4,6.5,6.5,4.0,3.5


R2 Score Valence:	0.26


REGRESSION

In [18]:
scores_a = regresion(svr_case2_a,data,arousal_mean) #Arousal Regression
scores_v = regresion(svr_case2_v,data,valence_mean) #Arousal Regression

AROUSAL

In [19]:
print("R2 Score Valence:\t",scores_a)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_a.mean(), scores_a.std() ))

R2 Score Valence:	 [0.41512758 0.48072126 0.51838905 0.40513468 0.69083098 0.53093083
 0.49098828 0.57910239 0.63595575 0.56560724]
R2 Score Valence:	0.53 (+/- 0.09)


VALENCE

In [20]:
print("R2 Score Valence:\t",scores_v)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_v.mean(), scores_v.std() ))

R2 Score Valence:	 [-0.07410652 -0.0061473   0.00963546 -0.22060325  0.37855586  0.09138256
  0.15624368  0.22226302  0.32870911  0.06076326]
R2 Score Valence:	0.09 (+/- 0.17)


In [25]:
# save_predict(df2_a.round(2),df2_v.round(2),datafitur)
# save_model(train_a,datafitur,'arousal/') #save model Arousal
# save_model(train_v,datafitur,'valence/') #save model Valence

# CASE 3

In [21]:
datafitur =  "case3" #CASE 3
data,arousal_mean,valence_mean = load(datafitur) #Load
print(data.shape)

train_data_a, test_data_a, train_values_a, test_values_a = split(data,arousal_mean) #split arousal
train_data_v, test_data_v, train_values_v, test_values_v = split(data,valence_mean) #split valence

(744, 37)


GRIDSEARCH

In [27]:
param_a, score_a = gridsearch(data,arousal_mean) #Gridsearch Arousal
param_v, score_v = gridsearch(data,valence_mean) #Grinsearch Valence



In [28]:
print("parameter Arousal:\t",param_a)
print("parameter Valence:\t",param_v)

parameter Arousal:	 {'C': 10000.0, 'gamma': 0.001}
parameter Valence:	 {'C': 10.0, 'gamma': 0.01}


INITAL MODEL

In [22]:
kernel = RationalQuadratic()

svr_case3_a = GPR(1.0 * kernel) #init Arousal
svr_case3_v = GPR(1.0 * kernel) #init Valence

TRAIN

In [23]:
# #Predict Arousal
a_a,train_a,df3_a = predict(train_data_a, train_values_a, test_data_a, test_values_a, svr_case3_a)
# #Predict Valence
a_v,train_v,df3_v = predict(train_data_v, train_values_v, test_data_v, test_values_v, svr_case3_v)

r2_a = score(a_a,test_values_a) #Score Arousal
r2_v = score(a_v,test_values_v) #Score Valence

AROUSAL

In [24]:
df3_a = df3_a.round(2)

display(df3_a.head())
print("R2 Score Arousal:\t%0.2f" % r2_a)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,5.09,5.81,6.61,3.56,3.35,4.43,3.23,5.87,3.32,5.27,...,6.43,5.15,4.9,4.83,5.13,4.73,5.51,6.32,4.09,2.88
1,6.2,5.9,6.4,3.1,3.5,3.6,3.0,5.8,3.6,6.2,...,7.0,5.0,3.7,5.8,4.7,4.9,4.8,6.3,3.3,2.2


R2 Score Arousal:	0.64


VALENCE

In [25]:
df3_v = df3_v.round(2)

display(df3_v.head())
print("R2 Score Valence:\t%0.2f" % r2_v)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,5.31,5.57,5.53,4.47,4.16,5.02,4.49,5.77,3.95,5.46,...,5.38,5.79,5.02,5.05,5.38,5.05,5.18,5.09,4.64,3.89
1,2.7,6.4,6.0,2.7,5.0,3.2,4.6,4.3,4.1,6.7,...,6.6,4.8,2.5,5.6,3.8,4.4,6.5,6.5,4.0,3.5


R2 Score Valence:	0.26


REGRESSION

In [26]:
scores_a = regresion(svr_case3_a,data,arousal_mean) #Arousal Regression
scores_v = regresion(svr_case3_v,data,valence_mean) #Arousal Regression

AROUSAL

In [27]:
print("R2 Score Valence:\t",scores_a)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_a.mean(), scores_a.std() ))

R2 Score Valence:	 [0.41682792 0.47059352 0.52368899 0.39383833 0.69197452 0.53997056
 0.4831702  0.57153879 0.61814765 0.57603766]
R2 Score Valence:	0.53 (+/- 0.09)


VALENCE

In [28]:
print("R2 Score Valence:\t",scores_v)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_v.mean(), scores_v.std() ))

R2 Score Valence:	 [-0.04710819  0.00364301  0.02475799 -0.18380585  0.36444722  0.09709142
  0.12674123  0.12677103  0.26965581  0.11130809]
R2 Score Valence:	0.09 (+/- 0.15)


In [36]:
# save_predict(df3_a.round(2),df3_v.round(2),datafitur)
# save_model(train_a,datafitur,'arousal/') #save model Arousal
# save_model(train_v,datafitur,'valence/') #save model Valence

# CASE 4

In [29]:
datafitur =  "case4" #CASE 3
data,arousal_mean,valence_mean = load(datafitur) #Load
print(data.shape)

train_data_a, test_data_a, train_values_a, test_values_a = split(data,arousal_mean) #split arousal
train_data_v, test_data_v, train_values_v, test_values_v = split(data,valence_mean) #split valence

(744, 85)


GRIDSEAERCH

In [38]:
param_a, score_a = gridsearch(data,arousal_mean) #Gridsearch Arousal
param_v, score_v = gridsearch(data,valence_mean) #Grinsearch Valence



In [39]:
print("parameter Arousal:\t",param_a)
print("parameter Valence:\t",param_v)

parameter Arousal:	 {'C': 10000.0, 'gamma': 0.0001}
parameter Valence:	 {'C': 100.0, 'gamma': 0.001}


INITIAL MODEL

In [30]:
kernel = RationalQuadratic()

svr_case4_a = GPR(1.0 * kernel) #init Arousal
svr_case4_v = GPR(1.0 * kernel) #init Valence

TRAIN

In [31]:
# #Predict Arousal
a_a,train_a,df4_a = predict(train_data_a, train_values_a, test_data_a, test_values_a, svr_case4_a)
# #Predict Valence
a_v,train_v,df4_v = predict(train_data_v, train_values_v, test_data_v, test_values_v, svr_case4_v)

r2_a = score(a_a,test_values_a) #Score Arousal
r2_v = score(a_v,test_values_v) #Score Valence

AROUSAL

In [32]:
df4_a = df4_a.round(2)

display(df4_a.head())
print("R2 Score Arousal:\t%0.2f" % r2_a)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,4.65,5.33,5.14,3.78,3.42,4.51,4.38,5.42,3.52,4.58,...,5.57,5.44,4.16,3.88,4.07,5.12,5.39,5.59,4.23,3.46
1,6.2,5.9,6.4,3.1,3.5,3.6,3.0,5.8,3.6,6.2,...,7.0,5.0,3.7,5.8,4.7,4.9,4.8,6.3,3.3,2.2


R2 Score Arousal:	0.44


VALENCE

In [33]:
df4_v = df4_v.round(2)

display(df4_v.head())
print("R2 Score Valence:\t%0.2f" % r2_v)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,214,215,216,217,218,219,220,221,222,223
0,4.54,5.71,4.45,5.12,4.35,4.9,4.84,4.93,4.2,4.8,...,4.68,5.65,4.51,4.26,4.29,5.2,4.89,4.38,4.64,3.9
1,2.7,6.4,6.0,2.7,5.0,3.2,4.6,4.3,4.1,6.7,...,6.6,4.8,2.5,5.6,3.8,4.4,6.5,6.5,4.0,3.5


R2 Score Valence:	0.12


REGRESSION

In [34]:
scores_a = regresion(svr_case4_a,data,arousal_mean) #Arousal Regression
scores_v = regresion(svr_case4_v,data,valence_mean) #Arousal Regression

AROUSAL

In [35]:
print("R2 Score Valence:\t",scores_a)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_a.mean(), scores_a.std() ))

R2 Score Valence:	 [0.18926983 0.45401008 0.31438706 0.3742225  0.553324   0.3342589
 0.30788942 0.21280236 0.44663738 0.44138559]
R2 Score Valence:	0.36 (+/- 0.11)


VALENCE

In [36]:
print("R2 Score Valence:\t",scores_v)
print("R2 Score Valence:\t%0.2f (+/- %0.2f)" % (scores_v.mean(), scores_v.std() ))

R2 Score Valence:	 [-0.06674634 -0.00821007  0.06468341 -0.07466524  0.24284139  0.06619059
 -0.08377337 -0.04361331  0.02288482  0.13155112]
R2 Score Valence:	0.03 (+/- 0.10)


In [47]:
# save_predict(df4_a.round(2),df4_v.round(2),datafitur)
# save_model(train_a,datafitur,'arousal/') #save model Arousal
# save_model(train_v,datafitur,'valence/') #save model Valence

In [None]:
# # Load from file
# with open(arousal_model, 'rb') as file_arousal:
#     arousal_model = pickle.load(file_arousal)
# # # Load from file
# # with open(valence_model, 'rb') as file_valence:
# #     valence_model = pickle.load(file_valence)

# a = arousal_model.predict(test_data_a)