In [55]:
import os
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from PIL import Image
from sklearn.decomposition import PCA
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [57]:
emotion_to_number = {
    "angry": 0,
    "disgust": 1,
    "fear": 2,
    "happy": 3,
    "sad": 4,
    "surprise": 5,
    "neutral": 6
}
folder = "./db"
data = []
for label in os.listdir(folder):
    label_folder = os.path.join(folder, label)
    if os.path.isdir(label_folder): 
        for filename in os.listdir(label_folder):
            img_path = os.path.join(label_folder, filename)
            img = Image.open(img_path).convert('L') 
            img = img.resize((48, 48))
            img = np.array(img)
            img = img.flatten()
            data.append(np.append(img, emotion_to_number[label.lower()])) 
# shuffle to get them mixed up
data = shuffle(data, random_state=42)

In [58]:
df = pd.DataFrame(data, columns=[*range(2304), "emotion"])
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2295,2296,2297,2298,2299,2300,2301,2302,2303,emotion
0,253,254,255,253,255,245,208,172,169,151,...,253,254,255,255,254,255,255,253,255,2
1,196,223,136,163,127,140,128,106,96,102,...,219,205,200,197,196,191,190,162,156,3
2,233,216,179,157,144,145,155,171,183,177,...,222,221,221,221,221,221,221,221,221,2
3,6,6,9,9,83,114,35,18,28,20,...,153,78,17,8,9,9,11,6,8,4
4,185,50,3,19,9,9,7,12,26,8,...,111,204,224,226,225,225,224,225,226,3


In [61]:
x = df.drop(['emotion'], axis=1)
y = df['emotion']
x.shape

(35887, 2304)

In [62]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(28709, 2304) (7178, 2304) (28709,) (7178,)


In [63]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(X_train_scaled.shape)
print(X_test_scaled.shape)
print(X_train_scaled)

(28709, 2304)
(7178, 2304)
[[ 0.13290277  0.43354239  0.42639928 ...  0.0037627   0.08315867
   0.1848582 ]
 [ 1.43233363  0.19849304 -0.11400979 ... -1.17533395 -1.26578681
  -1.22774702]
 [ 1.62664105  1.67064426  1.46951447 ...  1.83071466  1.81751714
   1.76290368]
 ...
 [-0.58360584 -1.00149578 -0.98117783 ...  1.84367176  1.81751714
   1.77562985]
 [ 0.74011345  0.79230193  0.84113183 ... -0.09989415 -0.58489052
  -0.97322356]
 [-1.44584501 -1.44685245 -1.42104568 ... -0.38495048 -0.54634922
  -1.07503294]]


In [65]:
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(X_train_pca.shape)
print(X_test_pca.shape)

(28709, 274)
(7178, 274)


In [66]:
svc_rbf_model = SVC(kernel='rbf', max_iter=100000, C=2)
svc_rbf_model.fit(X_train_pca, y_train)

In [67]:
prediction = svc_rbf_model.predict(X_test_pca)

In [68]:
accuracy = accuracy_score(y_test, prediction)
accuracy

0.4743661186960156

In [33]:
predictions_df = pd.DataFrame(prediction, columns=['result'])
predictions_df.to_csv('predictions_with_id.csv', index=False)

In [69]:
from joblib import dump
dump(svc_rbf_model, 'model.joblib')
dump(pca, 'pca.joblib')
dump(scaler, 'scaler.joblib')

['scaler.joblib']