In [11]:
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import graycomatrix, graycoprops
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
import os
import pandas as pd

angles = [0, np.pi/4, np.pi/2, 3*np.pi/4, np.pi]
n_neighbors = [3, 5, 7, 9, 11]
features = []
labels = []

In [12]:
def preprocess_image(image_path, target_size):
    # Read the image
    image = cv2.imread(image_path)

    # Convert image to grayscale
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize image while maintaining aspect ratio
    height, width = grayscale_image.shape[:2]
    if height > width:
        new_height = target_size
        new_width = int(width * (target_size / height))
    else:
        new_width = target_size
        new_height = int(height * (target_size / width))
    resized_image = cv2.resize(grayscale_image, (new_width, new_height))

    return resized_image

def extract_features(image):
    features = []
    for angle in angles:
        glcm = graycomatrix(image, [1], [angle], levels=256, symmetric=True, normed=True)
        dissimilarity = graycoprops(glcm, 'dissimilarity').ravel()
        correlation = graycoprops(glcm, 'correlation').ravel()
        homogeneity = graycoprops(glcm, 'homogeneity').ravel()
        contrast = graycoprops(glcm, 'contrast').ravel()
        asm = graycoprops(glcm, 'ASM').ravel()
        energy = graycoprops(glcm, 'energy').ravel()
        angle_features = np.concatenate((dissimilarity, correlation, homogeneity, contrast, asm, energy))
        features.extend(angle_features)

    return np.array(features)

In [13]:
parent_folder = "FacialExpression/"
subfolder_names = ["happy", "sad"]
df = pd.DataFrame(columns=['Image Name', 'Category'])

df_list = []
for subfolder in subfolder_names:
    subfolder_path = os.path.join(parent_folder, subfolder)
    image_list = os.listdir(subfolder_path)
    image_names = [os.path.splitext(image)[0] for image in image_list]
    category = [subfolder] * len(image_names)
    image_df = pd.DataFrame(
        {"Image Name": image_names, "Category": category})
    df_list.append(image_df)
df = pd.concat(df_list, ignore_index=True)
print(df['Category'].value_counts())

happy    1537
sad      1463
Name: Category, dtype: int64


In [14]:
for (i, imagePath) in enumerate(df['Image Name']):
    label = df['Category'][i]
    path = os.path.join(parent_folder, label + '/' + imagePath + ".jpg")
    try:
        image = preprocess_image(path, 128)
        feat = extract_features(image)
        features.append(feat)
        labels.append(label)
    except:
        print("File corrupted: {}".format(imagePath))

    # show an update every 200 images until the last image
    if i > 0 and ((i + 1)% 200 == 0 or i == len(imagePath)-1):
	    print("[INFO] processed {}/{}".format(i+1, len(df)))

[INFO] processed 10/3000
[INFO] processed 200/3000
[INFO] processed 400/3000
[INFO] processed 600/3000
[INFO] processed 800/3000
File corrupted: happy-0974
[INFO] processed 1000/3000
[INFO] processed 1200/3000
[INFO] processed 1400/3000
[INFO] processed 1600/3000
[INFO] processed 1800/3000
[INFO] processed 2000/3000
[INFO] processed 2200/3000
[INFO] processed 2400/3000
File corrupted: sad-0967
[INFO] processed 2600/3000
[INFO] processed 2800/3000
[INFO] processed 3000/3000


In [15]:
features = np.array(features)
labels = np.array(labels)

print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

[INFO] features matrix: 0.70MB


In [16]:
#divide 1537 happy and 1463 sad images into equal amount for training and testing using sklearn by 80 20
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.15, random_state=42)

In [17]:
# bestModel = None
# bestAcc = 0.0
# k1 = 0
# for (i, k) in enumerate(n_neighbors):
#     print("[INFO] evaluating raw pixel accuracy for k={}...".format(k))
#     model = KNeighborsClassifier(n_neighbors=k, weights="distance", n_jobs=-1)
#     model.fit(trainRI, trainRL)
#     pred_raw = model.predict(testRI)
#     acc = accuracy_score(testRL, pred_raw)

#     print("[INFO] k-NN classifier: k={}".format(k))
#     print("[INFO] raw pixel accuracy: {:.2f}%".format(acc*100))
#     report = classification_report(testRL, pred_raw, target_names=["happy", "sad"])
#     print(report)

#     if acc > bestAcc:
#         bestAcc = acc
#         bestModel = model
#         k1 = k

In [18]:
# img = "sad-0031.jpg"
# img2 = "happy-0078.jpg"
# img_gray = cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2GRAY)
# img_gray2 = cv2.cvtColor(cv2.imread(img2), cv2.COLOR_BGR2GRAY)
# feat = image_to_feature_vector(img_gray)
# feat2 = image_to_feature_vector(img_gray2)

# pred_img = bestModel.predict([feat])
# pred_img2 = bestModel.predict([feat2])

# acc = accuracy_score(testRL, pred_raw)

# print("[INFO] k-NN classifier: k={}".format(k1))
# print("[INFO] feature accuracy image model: {:.2f}%".format(acc*100))

# print("Image 1 is {}".format(pred_img))
# print("Image 2 is {}".format(pred_img2))

In [21]:
bestModel2 = None
bestAcc2 = 0.0
k2 = 0
for k in n_neighbors:
    print("[INFO] evaluating feature accuracy for k={}...".format(k))
    model = KNeighborsClassifier(n_neighbors=k, weights="uniform", n_jobs=-1)
    model.fit(trainFeat, trainLabels)
    pred_feat = model.predict(testFeat)
    acc = accuracy_score(testLabels, pred_feat)

    print("[INFO] k-NN classifier: k={}".format(k))
    print("[INFO] feature accuracy: {:.2f}%".format(acc*100))
    report = classification_report(testLabels, pred_feat, target_names=["happy", "sad"])
    print(report)

    if acc > bestAcc2:
        bestAcc2 = acc
        bestModel2 = model
        k2 = k

[INFO] evaluating feature accuracy for k=3...
[INFO] k-NN classifier: k=3
[INFO] feature accuracy: 51.11%
              precision    recall  f1-score   support

       happy       0.53      0.52      0.52       234
         sad       0.49      0.50      0.50       216

    accuracy                           0.51       450
   macro avg       0.51      0.51      0.51       450
weighted avg       0.51      0.51      0.51       450

[INFO] evaluating feature accuracy for k=5...
[INFO] k-NN classifier: k=5
[INFO] feature accuracy: 52.22%
              precision    recall  f1-score   support

       happy       0.54      0.56      0.55       234
         sad       0.50      0.49      0.49       216

    accuracy                           0.52       450
   macro avg       0.52      0.52      0.52       450
weighted avg       0.52      0.52      0.52       450

[INFO] evaluating feature accuracy for k=7...
[INFO] k-NN classifier: k=7
[INFO] feature accuracy: 53.11%
              precision    r

In [22]:
img = "sad-0031.jpg"
img2 = "happy-0078.jpg"
img_gray = cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2GRAY)
img_gray2 = cv2.cvtColor(cv2.imread(img2), cv2.COLOR_BGR2GRAY)
feat = extract_features(img_gray)
feat2 = extract_features(img_gray2)

pred_img = bestModel2.predict([feat])
pred_img2 = bestModel2.predict([feat2])

acc = accuracy_score(testLabels, pred_feat)

print("[INFO] k-NN classifier: k={}".format(k2))
print("[INFO] feature accuracy image model: {:.2f}%".format(acc*100))

print("Image 1 is {}".format(pred_img))
print("Image 2 is {}".format(pred_img2))

error: OpenCV(4.7.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [None]:
import pickle

filename = "knn_model.sav"
pickle.dump(bestModel2, open(filename, 'wb'))