In [24]:
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import graycomatrix, graycoprops
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
import os
import pandas as pd

angles = [0, np.pi/4, np.pi/2, 3*np.pi/4, np.pi]
n_neighbors = [3, 5, 7, 9, 11]
rawImages = []
features = []
labels = []

In [25]:
def image_to_feature_vector(image, size=(128, 128)):
	return cv2.resize(image, size).flatten()

def extract_features(image):
    features = []
    for angle in angles:
        glcm = graycomatrix(image, [1], [angle], levels=256, symmetric=True, normed=True)
        dissimilarity = graycoprops(glcm, 'dissimilarity').ravel()
        correlation = graycoprops(glcm, 'correlation').ravel()
        homogeneity = graycoprops(glcm, 'homogeneity').ravel()
        contrast = graycoprops(glcm, 'contrast').ravel()
        asm = graycoprops(glcm, 'ASM').ravel()
        energy = graycoprops(glcm, 'energy').ravel()
        angle_features = np.concatenate((dissimilarity, correlation, homogeneity, contrast, asm, energy))
        features.extend(angle_features)

    return np.array(features)

In [26]:
parent_folder = "FacialExpression/"
subfolder_names = ["happy", "sad"]
df = pd.DataFrame(columns=['Image Name', 'Category'])

df_list = []
for subfolder in subfolder_names:
    subfolder_path = os.path.join(parent_folder, subfolder)
    image_list = os.listdir(subfolder_path)
    image_names = [os.path.splitext(image)[0] for image in image_list]
    category = [subfolder] * len(image_names)
    image_df = pd.DataFrame(
        {"Image Name": image_names, "Category": category})
    df_list.append(image_df)
df = pd.concat(df_list, ignore_index=True)
print(df['Category'].value_counts())

happy    1536
sad      1462
Name: Category, dtype: int64


In [27]:
for (i, imagePath) in enumerate(df['Image Name']):
    label = df['Category'][i]
    path = os.path.join(parent_folder, label + '/' + imagePath + ".jpg")
    try:
        #read image in grayscale and resize it to be 1:1
        image = cv2.imread(path, 0)
        image = cv2.resize(image, (128, 128))
        pixels = image_to_feature_vector(image)
        feat = extract_features(image)
        rawImages.append(pixels)
        features.append(feat)
        labels.append(label)
    except:
        print("File corrupted: {}".format(imagePath))

    # show an update every 200 images until the last image
    if i > 0 and ((i + 1)% 200 == 0 or i == len(imagePath)-2):
	    print("[INFO] processed {}/{}".format(i+1, len(df)))

[INFO] processed 9/2998
[INFO] processed 200/2998
[INFO] processed 400/2998
[INFO] processed 600/2998
[INFO] processed 800/2998
File corrupted: happy-0974
[INFO] processed 1000/2998
[INFO] processed 1200/2998
[INFO] processed 1400/2998
[INFO] processed 1600/2998
[INFO] processed 1800/2998
[INFO] processed 2000/2998
[INFO] processed 2200/2998
[INFO] processed 2400/2998
File corrupted: sad-0967
[INFO] processed 2600/2998
[INFO] processed 2800/2998


In [28]:
rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)

print("[INFO] pixels matrix: {:.2f}MB".format(
	rawImages.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

[INFO] pixels matrix: 47.94MB
[INFO] features matrix: 0.70MB


In [29]:
#divide 1537 happy and 1463 sad images into equal amount for training and testing using sklearn by 80 20
(trainRI, testRI, trainRL, testRL) = train_test_split(
	rawImages, labels, test_size=0.15, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.15, random_state=42)

In [30]:
bestModel = None
bestAcc = 0.0
k1 = 0
for (i, k) in enumerate(n_neighbors):
    print("[INFO] evaluating raw pixel accuracy for k={}...".format(k))
    model = KNeighborsClassifier(n_neighbors=k, weights="distance", n_jobs=-1)
    model.fit(trainRI, trainRL)
    pred_raw = model.predict(testRI)
    acc = accuracy_score(testRL, pred_raw)

    print("[INFO] k-NN classifier: k={}".format(k))
    print("[INFO] raw pixel accuracy: {:.2f}%".format(acc*100))
    report = classification_report(testRL, pred_raw, target_names=["happy", "sad"])
    print(report)

    if acc > bestAcc:
        bestAcc = acc
        bestModel = model
        k1 = k

[INFO] evaluating raw pixel accuracy for k=3...


[INFO] k-NN classifier: k=3
[INFO] raw pixel accuracy: 61.78%
              precision    recall  f1-score   support

       happy       0.62      0.64      0.63       230
         sad       0.61      0.60      0.60       220

    accuracy                           0.62       450
   macro avg       0.62      0.62      0.62       450
weighted avg       0.62      0.62      0.62       450

[INFO] evaluating raw pixel accuracy for k=5...
[INFO] k-NN classifier: k=5
[INFO] raw pixel accuracy: 64.00%
              precision    recall  f1-score   support

       happy       0.65      0.65      0.65       230
         sad       0.63      0.63      0.63       220

    accuracy                           0.64       450
   macro avg       0.64      0.64      0.64       450
weighted avg       0.64      0.64      0.64       450

[INFO] evaluating raw pixel accuracy for k=7...
[INFO] k-NN classifier: k=7
[INFO] raw pixel accuracy: 67.33%
              precision    recall  f1-score   support

       ha

In [31]:
bestModel2 = None
bestAcc2 = 0.0
k2 = 0
for k in n_neighbors:
    print("[INFO] evaluating feature accuracy for k={}...".format(k))
    model = KNeighborsClassifier(n_neighbors=k, weights="distance", n_jobs=-1)
    model.fit(trainFeat, trainLabels)
    pred_feat = model.predict(testFeat)
    acc = accuracy_score(testLabels, pred_feat)

    print("[INFO] k-NN classifier: k={}".format(k))
    print("[INFO] feature accuracy: {:.2f}%".format(acc*100))
    report = classification_report(testLabels, pred_feat, target_names=["happy", "sad"])
    print(report)

    if acc > bestAcc2:
        bestAcc2 = acc
        bestModel2 = model
        k2 = k

[INFO] evaluating feature accuracy for k=3...
[INFO] k-NN classifier: k=3
[INFO] feature accuracy: 55.11%
              precision    recall  f1-score   support

       happy       0.56      0.60      0.58       230
         sad       0.55      0.50      0.52       220

    accuracy                           0.55       450
   macro avg       0.55      0.55      0.55       450
weighted avg       0.55      0.55      0.55       450

[INFO] evaluating feature accuracy for k=5...
[INFO] k-NN classifier: k=5
[INFO] feature accuracy: 53.11%
              precision    recall  f1-score   support

       happy       0.54      0.60      0.57       230
         sad       0.52      0.46      0.49       220

    accuracy                           0.53       450
   macro avg       0.53      0.53      0.53       450
weighted avg       0.53      0.53      0.53       450

[INFO] evaluating feature accuracy for k=7...
[INFO] k-NN classifier: k=7
[INFO] feature accuracy: 52.67%
              precision    r

In [33]:
img = "sad-0031.jpg"
img2 = "happy-0078.jpg"
img_gray = cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2GRAY)
img_gray2 = cv2.cvtColor(cv2.imread(img2), cv2.COLOR_BGR2GRAY)
feat = image_to_feature_vector(img_gray)
feat2 = image_to_feature_vector(img_gray2)

pred_img = bestModel.predict([feat])
pred_img2 = bestModel.predict([feat2])

acc = accuracy_score(testRL, pred_raw)

print("[INFO] k-NN classifier: k={}".format(k1))
print("[INFO] feature accuracy image model: {:.2f}%".format(acc*100))

print("Image 1 is {}".format(pred_img))
print("Image 2 is {}".format(pred_img2))

[INFO] k-NN classifier: k=7
[INFO] feature accuracy image model: 66.67%
Image 1 is ['happy']
Image 2 is ['happy']


In [34]:
img = "sad-0031.jpg"
img2 = "happy-0078.jpg"
img_gray = cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2GRAY)
img_gray2 = cv2.cvtColor(cv2.imread(img2), cv2.COLOR_BGR2GRAY)
feat = extract_features(img_gray)
feat2 = extract_features(img_gray2)

pred_img = bestModel2.predict([feat])
pred_img2 = bestModel2.predict([feat2])

acc = accuracy_score(testLabels, pred_feat)

print("[INFO] k-NN classifier: k={}".format(k2))
print("[INFO] feature accuracy image model: {:.2f}%".format(acc*100))

print("Image 1 is {}".format(pred_img))
print("Image 2 is {}".format(pred_img2))

[INFO] k-NN classifier: k=3
[INFO] feature accuracy image model: 53.56%
Image 1 is ['sad']
Image 2 is ['sad']


In [35]:
import pickle

filename = "knn_model.sav"
pickle.dump(bestModel2, open(filename, 'wb'))