In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
df_angry = pd.read_csv('df_angry.csv')
df_disgusted = pd.read_csv('df_disgusted.csv')
df_happy = pd.read_csv('df_happy.csv')
df_neutral = pd.read_csv('df_neutral.csv')
df_sad = pd.read_csv('df_sad.csv')
df_shocked = pd.read_csv('df_shocked.csv')

In [3]:
print(len(df_angry), len(df_disgusted), len(df_happy), len(df_neutral), len(df_sad), len(df_shocked))

6595 2943 6456 9386 6888 6163


In [4]:
data = pd.concat([df_angry, df_disgusted, df_happy, df_neutral, df_sad, df_shocked], axis=0)
data["emotion"] = ["angry"]*len(df_angry) + ["disgusted"]*len(df_disgusted) + ["happy"]*len(df_happy) + ["neutral"]*len(df_neutral) + ["sad"]*len(df_sad) + ["shocked"]*len(df_shocked)
data["emotion"]

0         angry
1         angry
2         angry
3         angry
4         angry
         ...   
6158    shocked
6159    shocked
6160    shocked
6161    shocked
6162    shocked
Name: emotion, Length: 38431, dtype: object

In [5]:
train_data, test_data, train_labels, test_labels = train_test_split(data.iloc[:,:-1], data.iloc[:,-1], test_size=0.2, random_state=42)

In [6]:
print("Train data size: ", len(train_data))
print("Test data size: ", len(test_data))

Train data size:  30744
Test data size:  7687


In [7]:
clf_linear = svm.SVC(kernel="linear", C=1, verbose=True)
clf_linear.fit(train_data, train_labels)

[LibSVM]

In [8]:
print(classification_report(test_labels, clf_linear.predict(test_data)))

              precision    recall  f1-score   support

       angry       0.42      0.44      0.43      1350
   disgusted       0.50      0.04      0.07       601
       happy       0.69      0.72      0.70      1255
     neutral       0.45      0.68      0.54      1866
         sad       0.43      0.29      0.35      1412
     shocked       0.63      0.61      0.62      1203

    accuracy                           0.51      7687
   macro avg       0.52      0.46      0.45      7687
weighted avg       0.51      0.51      0.49      7687



In [9]:
from sklearn.feature_selection import RFE
rfe = RFE(clf_linear, n_features_to_select=50, step=50, verbose=1)
rfe.fit(train_data, train_labels)

Fitting estimator with 956 features.
[LibSVM]Fitting estimator with 906 features.
[LibSVM]Fitting estimator with 856 features.
[LibSVM]Fitting estimator with 806 features.
[LibSVM]Fitting estimator with 756 features.
[LibSVM]Fitting estimator with 706 features.
[LibSVM]Fitting estimator with 656 features.
[LibSVM]Fitting estimator with 606 features.
[LibSVM]Fitting estimator with 556 features.
[LibSVM]Fitting estimator with 506 features.
[LibSVM]Fitting estimator with 456 features.
[LibSVM]Fitting estimator with 406 features.
[LibSVM]Fitting estimator with 356 features.
[LibSVM]Fitting estimator with 306 features.
[LibSVM]Fitting estimator with 256 features.
[LibSVM]Fitting estimator with 206 features.
[LibSVM]Fitting estimator with 156 features.
[LibSVM]Fitting estimator with 106 features.
[LibSVM]Fitting estimator with 56 features.
[LibSVM][LibSVM]

In [12]:
importances = abs(rfe.estimator_.coef_[0])
indices = importances.argsort()[::-1]
print(indices)

[38 14 23 21 35 15 16 33 12 36 26 46 47  6 42 24 49  9  4  8 48  0 32 40
 25 31 20 27 10  7 22 41 30 19 29 39  1 28 34  2 11  5 17 43  3 44 37 45
 13 18]


In [14]:
print(classification_report(test_labels, rfe.predict(test_data)))

              precision    recall  f1-score   support

       angry       0.42      0.38      0.40      1350
   disgusted       0.00      0.00      0.00       601
       happy       0.66      0.70      0.68      1255
     neutral       0.41      0.73      0.52      1866
         sad       0.39      0.19      0.26      1412
     shocked       0.60      0.57      0.58      1203

    accuracy                           0.48      7687
   macro avg       0.41      0.43      0.41      7687
weighted avg       0.45      0.48      0.45      7687



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
import time
start = time.time()
clf_linear.predict(test_data)
end = time.time()
print("Time taken for linear model: ", end-start)

start = time.time()
rfe.predict(test_data)
end = time.time()
print("Time taken for rfe model: ", end-start)

Time taken for linear model:  74.38189673423767
Time taken for rfe model:  5.275679588317871


In [29]:
import pickle
pickle.dump(rfe, open("emotion_model.pkl", "wb"))