In [None]:
#importing all the libs
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from keras.models import Model, Sequential
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
# Reading CSV and naming columns
data= pd.read_csv("icml_face_data.csv")
data.rename(columns= {' pixels':'pixels',' Usage':'Usage' }, inplace=True)

In [None]:
print("size of data")
len(data)
# displaying the length of file

In [None]:
# Dictionary to encode the emotions into numbers
emotion_map = {0: 'Angry', 1: 'Digust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}
emotion_counts = data['emotion'].value_counts(sort=False).reset_index()
emotion_counts.columns = ['emotion', 'number']
emotion_counts['emotion'] = emotion_counts['emotion'].map(emotion_map)
emotion_counts

In [None]:
# plotting data distribution for various emotion images
plt.figure(figsize=(6,4))
sns.barplot(emotion_counts.emotion, emotion_counts.number)
plt.title('Class distribution')
plt.ylabel('Number', fontsize=12)
plt.xlabel('Emotions', fontsize=12)
plt.show()

In [None]:
# plotting sample images for various emotions in gray scale and scaling them to size
fig = plt.figure(1, (14, 14))

k = 0
for label in sorted(data.emotion.unique()):
    for j in range(1):
        px = data[data.emotion==label].pixels.iloc[k]
        px = np.array(px.split(' ')).reshape(48, 48).astype('float32')

        k += 1
        ax = plt.subplot(7, 7, k)
        ax.imshow(px , cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(emotion_map[label])
        plt.tight_layout()

In [None]:
# getting image pixels into a dataframe for giving it as input to our models
pixels_df=  pd.DataFrame(data['pixels'].str.split(' ', expand=True))

In [None]:
# converting the above read image pixel values to numeric values by replacing missing pixels(NaN) with 0's
print("converting to numeric")
pixels_df= pixels_df.apply(pd.to_numeric, errors='coerce').replace(np.nan,0)
pixels_df= pixels_df/255
print("conversion to numeric is done")
final_df= pd.concat([data[['emotion', 'Usage']], pixels_df], axis=1)
print(final_df.head())

In [None]:
#####create train test and validation data sets
lab_enc = preprocessing.LabelEncoder()
final_df['emotion'] = lab_enc.fit_transform(final_df['emotion'])
##
X= final_df.drop(['emotion','Usage'], axis=1)
Y= final_df['emotion']
###
x_train,x_test, y_train, y_test= train_test_split(X,Y, test_size=0.15, random_state=1)
x_train,x_val, y_train, y_val= train_test_split(x_train,y_train, test_size=0.18, random_state=1)

(y_test)

In [None]:
# Training and Predicting using support vector machine using polynomial kernel
# average = 'micro' means
"""Micro-averaging is used when a problem has 2 or more labels that can be true, 
   for example, in our tutorial Build your own music critic. 
   Micro-averaging F1-score is performed by first calculating the sum of all true positives, false positives, 
   and false negatives over all the labels."""
f1_valid = []
f1_test = []
degree= [2,5,10,20]
C= [0.1,1,10,100,1000]
gamma= [0.0001,0.001,0.01,1,1.5]
valid = []
test_data = []
best_f1_score=0
for d in degree:
  for c in C:
    for g in gamma:
      model= SVC(C=c,gamma=g, degree=d, kernel= 'poly')
      print("fit the model")
      model.fit(x_train,y_train) 
      y_pred= model.predict(x_val)
      f1_score_val= metrics.f1_score(y_val, y_pred, average='micro')
      valid.append(f1_score_val)
      y_pred_test= model.predict(x_test)
      f1_score_test= metrics.f1_score(y_test, y_pred_test, average='micro')
      test_data.append(f1_score_test)
      if(f1_score_val>best_f1_score):
        best_f1_score= f1_score_val
        f1_score_on_test_data= f1_score_test
        best_param= {'C':c, 'degree':d, 'gamma':g}
###print best f score and best params
print('best parameters :',best_param)
print('best f1 score :',best_f1_score)

In [None]:
# printing f1 scores for test and validation data
print("Best parameters :", best_param)
print("f1 score on validation data set :", f1_score_val)
print("f1 score on test data set :", f1_score_test)

f1_valid.append(f1_score_val)
f1_test.append(f1_score_test)

In [None]:
plt.plot(valid, label='Validation')
plt.plot(test_data, label="Test")
plt.legend()
plt.show()

In [None]:
# Training and Predicting using support vector machine using rbf kernel
degree= [2,5,10,20]
C= [0.1,1,10,100,1000]
gamma= [0.0001,0.001,0.01,1,1.5]
best_f1_score=0
valid = []
test_data = []
# for d in degree:
for c in C:
  for g in gamma:
    model= SVC(C=c,gamma=g, kernel= 'rbf')
    print("fit the model")
    model.fit(x_train,y_train)
    # print("Best parameters :", model.best_params_)
    ###predict on test dataset 
    y_pred= model.predict(x_val)
    f1_score_val= metrics.f1_score(y_val, y_pred, average='micro')
    valid.append(f1_score_val)
    # print(y_pred_test)
    y_pred_test= model.predict(x_test)
    f1_score_test= metrics.f1_score(y_test, y_pred_test, average='micro')
    test_data.append(f1_score_test)
    # print("f score :", f1_score(y_val, y_pred))
    if(f1_score_val>best_f1_score):
      best_f1_score= f1_score_val
      f1_score_on_test_data= f1_score_test
      best_param= {'C':c, 'gamma':g}
      print('best parameters :',best_param)
      print('best f1 score :',best_f1_score)

In [None]:
# printing results for svm with rbf kernel
print("Best parameters :", best_param)
print("f1 score on validation data set :", f1_score_val)
print("f1 score on test data set :", f1_score_test)

f1_valid.append(f1_score_val)
f1_test.append(f1_score_test)

In [None]:
plt.plot(valid, label='Validation')
plt.plot(test_data, label="Test")
plt.legend()
plt.show()

In [None]:
# Training and Predicting using decision trees 
max_depths=[4,10,20,50]
best_f1_score=0
for depth in max_depths:

  # param_grid={'max_depth':[4,10,20,50]}
  model = DecisionTreeClassifier(max_depth=depth, random_state=17)
  # model=GridSearchCV(DecisionTreeClassifier(random_state=17),param_grid, n_jobs=-1, verbose=3)
  model.fit(x_train,y_train)
  ###predict on val and test dataset 
  y_pred= model.predict(x_val)
  y_pred_test= model.predict(x_test)
  f1_score_val= metrics.f1_score(y_val, y_pred, average='micro')
    # print(y_pred_test)
  f1_score_test= metrics.f1_score(y_test, y_pred_test, average='micro')
    # print("f score :", f1_score(y_val, y_pred))
  if(f1_score_val>best_f1_score):
      best_f1_score= f1_score_val
      f1_score_on_test_data= f1_score_test
      best_param= {'max_depth':depth}
##print results
print('best parameters :',best_param)
print('best f1 score :',best_f1_score)
print("accuracy score :", accuracy_score(y_test.astype('int'), y_pred_test))

In [None]:
# printing results for decision tree predictions
print("Best parameters :", best_param)
print("f1 score on validation data set :", f1_score_val)
print("f1 score on test data set :", f1_score_test)

f1_valid.append(f1_score_val)
f1_test.append(f1_score_test)

In [None]:
y_train.unique()

In [None]:
# Training and Predicting using knn
import random
all_k= random.sample(range(2,50),25)+random.sample(range(50,1000),75)
for k in all_k:
  model= KNeighborsClassifier(n_jobs=-1,n_neighbors=k)
  model.fit(x_train,y_train)
  ###predict on val and test dataset 
  y_pred= model.predict(x_val)
  y_pred_test= model.predict(x_test)
  f1_score_val= metrics.f1_score(y_val, y_pred, average='micro')
    # print(y_pred_test)
  f1_score_test= metrics.f1_score(y_test, y_pred_test, average='micro')
    # print("f score :", f1_score(y_val, y_pred))
  if(f1_score_val>best_f1_score):
      best_f1_score= f1_score_val
      f1_score_on_test_data= f1_score_test
      best_param= {'k':k}
##print best results
print('best parameters :',best_param)
print('best f1 score :',best_f1_score)

In [None]:
# printing results for knn model
print("Best parameters :", best_param)
print("f1 score on validation data set :", f1_score_val)
print("f1 score on test data set :", f1_score_test)

f1_valid.append(f1_score_val)
f1_test.append(f1_score_test)

In [None]:
n_group = 4

fig, ax = plt.subplots()
index = np.arange(n_group)
bar_width = 0.35
opacity = 0.8

r1 = plt.bar(index, f1_valid, bar_width, alpha=opacity, color='b', label='Validation')
r2 = plt.bar(index + bar_width, f1_test, bar_width, alpha=opacity, color='g', label='Test')
plt.xlabel("Model Name")
plt.ylabel("F1_score")
plt.title("F1_Score for each Model")
plt.xticks(index + bar_width, ('SVM(poly)', "SVM(rbf)", "DT", "KNN"))
plt.legend()
plt.tight_layout()

plt.show()