In [1]:
import sys
import os
import pandas as pd
import seaborn as sns
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import svm

In [2]:
# Load the labels of the dataset 
path = str(sys.path[0])[:-2]
smile_labels = pd.read_csv(path + '/Datasets/dataset_AMLS_19-20/celeba/labels.csv', delimiter='\t')
smile_labels = smile_labels.iloc[:,1:4]
smile_dict = {1: 1, -1: 0}
smile_labels["smiling"].replace(smile_dict, inplace=True)
smile_labels = smile_labels.drop('gender', axis=1)
smile_labels.head()


Unnamed: 0,img_name,smiling
0,0.jpg,1
1,1.jpg,1
2,2.jpg,0
3,3.jpg,0
4,4.jpg,0


In [3]:
ROWS = 64
COLS = 64
CHANNELS = 3
print(smile_labels.shape)
# Create dataframes for test and train data split into .75 and .25 for train and test respectively
X_train = pd.DataFrame(np.zeros((int(.75*smile_labels.shape[0]), ROWS * COLS * CHANNELS)))
X_test = pd.DataFrame(np.zeros((int(.25*smile_labels.shape[0]), ROWS * COLS * CHANNELS)))
y_train = list()
y_test = list()

(5000, 2)


In [4]:
# Split into male and female data
smile_data = smile_labels[(smile_labels['smiling'] == 1)]
no_smile_data = smile_labels[smile_labels['smiling'] == 0]

In [5]:
# Splitting male and female data into train and test
train_smile_data, test_smile_data = train_test_split(smile_data)
train_no_smile_data, test_no_smile_data = train_test_split(no_smile_data)
train_no_smile_data.head()

Unnamed: 0,img_name,smiling
3605,3605.jpg,0
2475,2475.jpg,0
867,867.jpg,0
3781,3781.jpg,0
3455,3455.jpg,0


In [6]:
# Create final test data set out of male and female data
test_indices = test_no_smile_data.index.tolist() + test_smile_data.index.tolist()
test_data = smile_labels.iloc[test_indices,:]
test_data.head()

Unnamed: 0,img_name,smiling
1269,1269.jpg,0
4278,4278.jpg,0
2638,2638.jpg,0
286,286.jpg,0
2401,2401.jpg,0


In [7]:
# Filter the training data from the labels 
train_data = pd.concat([smile_labels, test_data, test_data]).drop_duplicates(keep=False)
train_data.head()

Unnamed: 0,img_name,smiling
1,1.jpg,1
2,2.jpg,0
3,3.jpg,0
5,5.jpg,0
6,6.jpg,0


In [8]:
# Read the image names 
img_path = path + 'Datasets/dataset_AMLS_19-20/celeba/img/' # path of your image folder
train_image_name = [img_path + each for each in train_data['img_name'].values.tolist()]
test_image_name = [img_path + each for each in test_data['img_name'].values.tolist()]

In [9]:
def prep_img(img_path, rows, columns):
    # resize the image and flatten to use in LR
    img = cv2.imread(img_path,cv2.IMREAD_COLOR)
    img = cv2.resize(img, (rows, columns))
    return(pd.Series(img.flatten()))

In [10]:
# iterating through files to create training and test set - the same split as for the CNN

counter_train = 0

# create the training set
for img in train_image_name:
    X_train.iloc[counter_train, :] = prep_img(img, ROWS, COLS) / 255
    counter_train += 1
    
y_train = train_data.iloc[:,1].values.tolist()

In [11]:
# create the test dataset

counter_test = 0
for img in test_image_name:
    X_test.iloc[counter_test, :] = prep_img(img, ROWS, COLS) / 255
    counter_test += 1
    
y_test = test_data.iloc[:,1].values.tolist()

In [68]:
#Create a svm Classifier
svm_classifier = svm.SVC(kernel='linear', C=.01, gamma=5)

In [69]:
svm_classifier.fit(X_train, y_train)

SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=5, kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [70]:
y_pred = svm_classifier.predict(X_test)

In [71]:
print("Accuracy:",accuracy_score(y_test, y_pred))

Accuracy: 0.868
