In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

In [2]:
df_angry = pd.read_csv('df_angry.csv')
df_disgusted = pd.read_csv('df_disgust.csv')
df_happy = pd.read_csv('df_happy.csv')
df_neutral = pd.read_csv('df_neutral.csv')
df_sad = pd.read_csv('df_sad.csv')
df_surprise = pd.read_csv('df_surprise.csv')
df_fear = pd.read_csv('df_fear.csv')

In [3]:
print(len(df_angry), len(df_disgusted), len(df_happy), len(df_neutral), len(df_sad), len(df_surprise), len(df_fear))

4204 503 8085 5564 4673 3745 4267


In [4]:
data = pd.concat([df_angry, df_disgusted, df_happy, df_neutral, df_sad, df_surprise, df_fear], axis=0)
data["emotion"] = ["angry"]*len(df_angry) + ["disgusted"]*len(df_disgusted) + ["happy"]*len(df_happy) + ["neutral"]*len(df_neutral) + ["sad"]*len(df_sad) + ["surprised"]*len(df_surprise) + ["fear"]*len(df_fear)
data["emotion"]

0       angry
1       angry
2       angry
3       angry
4       angry
        ...  
4262     fear
4263     fear
4264     fear
4265     fear
4266     fear
Name: emotion, Length: 31041, dtype: object

In [5]:
train_data, test_data, train_labels, test_labels = train_test_split(data.iloc[:,:-1], data.iloc[:,-1], test_size=0.2, random_state=42)

In [6]:
print("Train data size: ", len(train_data))
print("Test data size: ", len(test_data))

Train data size:  24832
Test data size:  6209


In [7]:
clf_linear = svm.SVC(kernel="linear", C=1, verbose=True)
clf_linear.fit(train_data, train_labels)

[LibSVM]

In [10]:
clf_poly = svm.SVC(kernel='poly', degree=10, C=1.0, verbose=True)
clf_poly.fit(train_data, train_labels)

[LibSVM]

In [11]:
clf_rbf = svm.SVC(kernel='rbf', gamma=0.7, C=1.0, verbose=True)
clf_rbf.fit(train_data, train_labels)

[LibSVM]

In [12]:
clf_sigmoid = svm.SVC(kernel='sigmoid', C=1.0, verbose=True)
clf_sigmoid.fit(train_data, train_labels)

[LibSVM]

In [8]:
scores_linear = cross_val_score(clf_linear, train_data, train_labels, cv=5)
print("Linear Kernel: %0.2f accuracy with a standard deviation of %0.2f" % (scores_linear.mean(), scores_linear.std()))

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]Linear Kernel: 0.52 accuracy with a standard deviation of 0.00


In [14]:
scores_poly = cross_val_score(clf_poly, train_data, train_labels, cv=5)
print("Polynomial Kernel: %0.2f accuracy with a standard deviation of %0.2f" % (scores_poly.mean(), scores_poly.std()))

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]Polynomial Kernel: 0.43 accuracy with a standard deviation of 0.01


In [15]:
scores_rbf = cross_val_score(clf_rbf, train_data, train_labels, cv=5)
print("RBF Kernel: %0.2f accuracy with a standard deviation of %0.2f" % (scores_rbf.mean(), scores_rbf.std()))

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]RBF Kernel: 0.47 accuracy with a standard deviation of 0.00


In [16]:
scores_sigmoid = cross_val_score(clf_sigmoid, train_data, train_labels, cv=5)
print("Sigmoid Kernel: %0.2f accuracy with a standard deviation of %0.2f" % (scores_sigmoid.mean(), scores_sigmoid.std()))

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]Sigmoid Kernel: 0.24 accuracy with a standard deviation of 0.01


In [9]:
coefficients = clf_linear.coef_
num_features = coefficients.shape[1]
print("Number of features: ", num_features)

Number of features:  956


In [10]:
import pickle
pickle.dump(clf_linear, open("emotion_model.pkl", "wb"))