In [71]:
!pip install ipdb
#from data_preprocess import FiducialDataProcess
import ipdb
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.decomposition import NMF
import warnings

class NMF_Data(object):
	def __init__(self, dat_x, dat_y):
		self.dat_x = dat_x
		self.dat_y = dat_y
		self.nmf_features = []

	def create_nmf(self, test_size=500):
		x_train, x_test, y_train, y_test = train_test_split(self.dat_x, self.dat_y, random_state=1, test_size = test_size)
		print(x_train.shape, x_test.shape)
		
		nmf = NMF(n_components=300, random_state=0)
		nmf.fit(x_train)

		x_train_nmf = nmf.transform(x_train)
		x_test_nmf = nmf.transform(x_test)

		self.nmf_features.append(x_train_nmf)
		self.nmf_features.append(y_train)
		self.nmf_features.append(x_test_nmf)
		self.nmf_features.append(y_test)

	def get_nmf_features(self):
		return self.nmf_features

	def save_nmf(self, filename):
		self.create_nmf()
		np.save(filename, self.nmf_features)


if __name__ == '__main__':
    my_data = np.genfromtxt('dense_data_type_and_emot.csv', delimiter=',')
    np.random.seed(0)
    np.random.shuffle(my_data)	
    features_init = my_data[:,1:]
    features = features_init / features_init.max(axis=0)
    labels = my_data[:,0:2] 
    nmf_total = NMF_Data(features, labels)
    filename = 'nmf_features_type_emot.npy'
    nmf_total.save_nmf(filename)


	# dat = pd.read_csv("nmf_faducial.csv", index_col=0)
	# dat_x = dat.loc[:,'feature1':'feature6006']
	# dat_y = dat.loc[:, 'emotion_idx']
	# nmf_total = NMF_Data(dat_x, dat_y)
	# nmf_total.save_nmf()

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
(2000, 3004) (500, 3004)


KeyboardInterrupt: 

In [19]:
nmf_create = NMF_Data.create_nmf(nmf_total)
data_get = NMF_Data.get_nmf_features(nmf_total)

(2000, 3004) (500, 3004)


In [25]:
x_train, y_train, x_test, y_test = data_get[0], data_get[1], data_get[2], data_get[3]

## SVM for 0/1

In [68]:
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')
svm_param_grid = {'C':[0.001,0.01,0.1,1,10,50,100,500],
                   'kernel':['rbf', 'linear', 'poly', 'sigmoid']}

svm_clf = SVC(**svm_param_grid)

grid = GridSearchCV(svm_clf,
                    param_grid=svm_param_grid,
                    cv=5,
                    verbose = 1,
                    scoring='accuracy')

grid.fit(x_train, y_train[:,0])
grid.best_params_, grid.best_score_

Fitting 5 folds for each of 32 candidates, totalling 160 fits


[Parallel(n_jobs=1)]: Done 160 out of 160 | elapsed:   41.7s finished


({'C': 500, 'kernel': 'linear'}, 0.9885)

In [69]:
svm_best = SVC(**grid.best_params_)
svm_best.fit(x_train,y_train[:,0])
score_svm = svm_best.score(x_test,y_test[:,0])
score_svm

0.988

In [70]:
print ('overall accuracy is ' + str(score_svm))

overall accuracy is 0.988


## SVM for multi emotion classification

In [37]:
svm_param_grid = {'C':[0.001,0.01,0.1,1,10,50,100,500],
                   'kernel':['rbf', 'linear', 'poly', 'sigmoid'],
                   'decision_function_shape': ['ovr', 'ovo']}

svm_clf_multi = SVC(**svm_param_grid)

grid = GridSearchCV(svm_clf_multi,
                    param_grid=svm_param_grid,
                    cv=5,
                    verbose = 1,
                    scoring='accuracy')

grid.fit(x_train, y_train[:,1])
grid.best_params_, grid.best_score_

Fitting 5 folds for each of 64 candidates, totalling 320 fits


[Parallel(n_jobs=1)]: Done 320 out of 320 | elapsed:  3.2min finished


({'C': 100, 'decision_function_shape': 'ovr', 'kernel': 'linear'}, 0.4825)

In [38]:
svm_best = SVC(**grid.best_params_)
svm_best.fit(x_train,y_train[:,1])
score_svm = svm_best.score(x_test,y_test[:,1])
score_svm

0.474

In [54]:
y_train_simple, y_test_simple = y_train[:,0], y_test[:,0]
x_train_0 = x_train[np.where(y_train_simple==0)]
y_train_compund_0 = y_train[:,1][np.where(y_train_simple==0)]

svm_param_grid = {'C':[0.001,0.01,0.1,1,10,50,100,500],
                   'kernel':['rbf', 'linear', 'poly', 'sigmoid'],
                   'decision_function_shape': ['ovr', 'ovo']}

svm_clf_compund_0 = SVC(**svm_param_grid)

grid = GridSearchCV(svm_clf_compund_0,
                    param_grid=svm_param_grid,
                    cv=5,
                    verbose = 1,
                    scoring='accuracy')

grid.fit(x_train_0, y_train_compund_0)
grid.best_params_, grid.best_score_

Fitting 5 folds for each of 64 candidates, totalling 320 fits


[Parallel(n_jobs=1)]: Done 320 out of 320 | elapsed:   17.0s finished


({'C': 500, 'decision_function_shape': 'ovr', 'kernel': 'rbf'},
 0.7370078740157481)

In [55]:
x_test_0 = x_test[np.where(y_test_simple==0)]
y_test_compund_0 = y_test[:,1][np.where(y_test_simple==0)]

svm_best_compound_0 = SVC(**grid.best_params_)
svm_best_compound_0.fit(x_train_0, y_train_compund_0)
score_svm_compound_0 = svm_best_compound_0.score(x_test_0, y_test_compund_0)
score_svm_compound_0

0.7543859649122807

In [61]:
y_train_simple, y_test_simple = y_train[:,0], y_test[:,0]
x_train_1 = x_train[np.where(y_train_simple==1)]
y_train_compund_1 = y_train[:,1][np.where(y_train_simple==1)]

svm_param_grid = {'C':[0.001,0.01,0.1,1,10,50,100,500],
                   'kernel':['rbf', 'linear', 'poly', 'sigmoid'],
                   'decision_function_shape': ['ovr', 'ovo']}

svm_clf_compund_1 = SVC(**svm_param_grid)

grid = GridSearchCV(svm_clf_compund_1,
                    param_grid=svm_param_grid,
                    cv=5,
                    verbose = 1,
                    scoring='accuracy')

grid.fit(x_train_1, y_train_compund_1)
grid.best_params_, grid.best_score_

Fitting 5 folds for each of 64 candidates, totalling 320 fits


[Parallel(n_jobs=1)]: Done 320 out of 320 | elapsed:  1.5min finished


({'C': 50, 'decision_function_shape': 'ovr', 'kernel': 'linear'},
 0.49157509157509155)

In [62]:
x_test_1 = x_test[np.where(y_test_simple==1)]
y_test_compund_1 = y_test[:,1][np.where(y_test_simple==1)]

svm_best_compound_1 = SVC(**grid.best_params_)
svm_best_compound_1.fit(x_train_1, y_train_compund_1)
score_svm_compound_1 = svm_best_compound_1.score(x_test_1, y_test_compund_1)
score_svm_compound_1

0.46200607902735563

In [65]:
overall_acc = (len(y_test_compund_0)*score_svm_compound_0+len(y_test_compund_1)*score_svm_compound_1)/len(y_test)
print ('overall accuracy is ' + str(overall_acc))

overall accuracy is 0.562
