In [1]:
!pip install numpy==1.21.0
!pip install catboost
!pip install xgboost




In [2]:
from face import load_face_dataset

from sklearn.metrics import f1_score, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
import math
import imutils
import time
import cv2
import os
from tqdm import tqdm

**Note** : If you save the model, model file will take about `~1.2GB` of your memory. So be careful if you want to save it.

In [3]:
def train_with_lbph(input_data , test_size = 0.20 , output = True , save_model=False):
  # extract faces and label from dataset
  (faces , labels) = load_face_dataset(input_data , minSamples=15)

  # encode the labels
  le = LabelEncoder()
  labels = le.fit_transform(labels)

  # do the training process based on the number of 
  # random_states

  random_state = 42
  # split the data into training and test set
  X_train , X_test , y_train , y_test = train_test_split(faces , labels , test_size = test_size,
                                                        stratify = labels , random_state = random_state)
  
  # define and train the LBP model
  recognizer = cv2.face.LBPHFaceRecognizer_create()

  recognizer.train(X_train , y_train)

  # initialize our predictions and confidence lists
  predictions = []
  confidence = []

  # loop over the test data
  for i in range(0 , len(X_test)):
    # classify the face and update the predictions
    # and confidence scores
    (prediction , conf) = recognizer.predict(X_test[i])
    predictions.append(prediction)
    confidence.append(confidence)

  # collect and append the calculated f1-score based on this
  # random state
  # Calculate Accuracy
  f1_score_ = f1_score(y_test,predictions,average="macro")
  accuracy_score_ = accuracy_score(y_test, predictions)

  if output:
    # generate a sample of testing data
    idxs = np.random.choice(range(0 , len(X_test)) , size = 10 , replace = False)

    for i in idxs:
      # grab the actual and predicted name
      predName = le.inverse_transform([predictions[i]])[0]
      actualName = le.classes_[y_test[i]]

      # grab the face image, convert back it from 1 channel
      # into 3 channels and then resize it
      face = np.dstack([X_test[i]] * 3)
      face = imutils.resize(face , width = 250)

      # draw the actual and predicted name on the image
      cv2.putText(face , "actual : {}".format(actualName) , (5 , 25),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 0 , 255) , 2)
      cv2.putText(face , "prediction : {}".format(predName) , (5 , 60),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 255 , 0) , 2)
      
      # display the result image and the save it into
      # the desired directory 
      plt.imshow(face , cmap="gray")
      plt.savefig("output/lbp-faces/face-{}.png".format(i))
    # save the model 
  
  print("Saving the model")
  if save_model:
    # save the model 
    recognizer.save("face-gender-model.yml")

  return f1_score_, accuracy_score_

In [4]:
from sklearn.ensemble import RandomForestClassifier
from skimage import feature

def train_with_rf(input_data, test_size = 0.20 , output = True , save_model=False):
  # extract faces and label from dataset
  (faces , labels) = load_face_dataset(input_data , minSamples=15)

  # encode the labels
  le = LabelEncoder()
  labels = le.fit_transform(labels)

  # do the training process based on the number of 
  # random_states

  random_state = 42
    
  X_train , X_test , y_train , y_test = train_test_split(faces , labels , test_size = test_size,
                                                          stratify = labels , random_state = random_state)
        
  # Replace LBPH recognizer with LBP feature extraction
  lbp_features = []
  for face in X_train:
      # Calculate LBP features
      radius = 3
      n_points = 8 * radius
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      # Convert LBP image to feature vector (histogram)
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), 
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)  # Normalize
      lbp_features.append(hist)
      
  # Train Random Forest on LBP features
  rf_classifier = RandomForestClassifier(n_estimators=100, random_state=random_state)
  rf_classifier.fit(lbp_features, y_train)

  # Extract features from test set
  test_features = []
  for face in X_test:
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3),
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)
      test_features.append(hist)
      
  # Make predictions using Random Forest
  predictions = rf_classifier.predict(test_features)
  
  # Calculate metrics
  accuracy_score_ = accuracy_score(y_test, predictions)
  f1_score_ = f1_score(y_test, predictions, average="macro")

  if output:
    # generate a sample of testing data
    idxs = np.random.choice(range(0 , len(X_test)) , size = 10 , replace = False)

    for i in idxs:
      # grab the actual and predicted name
      predName = le.inverse_transform([predictions[i]])[0]
      actualName = le.classes_[y_test[i]]

      # grab the face image, convert back it from 1 channel
      # into 3 channels and then resize it
      face = np.dstack([X_test[i]] * 3)
      face = imutils.resize(face , width = 250)

      # draw the actual and predicted name on the image
      cv2.putText(face , "actual : {}".format(actualName) , (5 , 25),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 0 , 255) , 2)
      cv2.putText(face , "prediction : {}".format(predName) , (5 , 60),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 255 , 0) , 2)
      
      # display the result image and the save it into
      # the desired directory 
      plt.imshow(face , cmap="gray")
      plt.savefig("output/lbp-faces/face-{}.png".format(i))
    # save the model 
  
    print("Saving the model")
    if save_model:
      # save the model 
      import pickle 
      pickle.dump(rf_classifier, open('./gender-prediction-weights-rf.pkl','wb'))

  return f1_score_, accuracy_score_

In [5]:
# Train with SVM

from sklearn.svm import SVC
from skimage import feature

def train_with_svm(input_data, test_size = 0.20 , output = True , save_model=False):
  # extract faces and label from dataset
  (faces , labels) = load_face_dataset(input_data , minSamples=15)

  # encode the labels
  le = LabelEncoder()
  labels = le.fit_transform(labels)

  # do the training process based on the number of 
  # random_states

  random_state = 42
    
  X_train , X_test , y_train , y_test = train_test_split(faces , labels , test_size = test_size,
                                                          stratify = labels , random_state = random_state)
        
  # Replace LBPH recognizer with LBP feature extraction
  lbp_features = []
  for face in X_train:
      # Calculate LBP features
      radius = 3
      n_points = 8 * radius
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      # Convert LBP image to feature vector (histogram)
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), 
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)  # Normalize
      lbp_features.append(hist)
      
  # Train Random Forest on LBP features
  svm_classifier = SVC(kernel='linear', random_state=random_state)
  svm_classifier.fit(lbp_features, y_train)

  # Extract features from test set
  test_features = []
  for face in X_test:
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3),
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)
      test_features.append(hist)
      
  # Make predictions using Random Forest
  predictions = svm_classifier.predict(test_features)
  
  # Calculate metrics
  accuracy_score_ = accuracy_score(y_test, predictions)
  f1_score_ = f1_score(y_test, predictions, average="macro")

  if output:
    # generate a sample of testing data
    idxs = np.random.choice(range(0 , len(X_test)) , size = 10 , replace = False)

    for i in idxs:
      # grab the actual and predicted name
      predName = le.inverse_transform([predictions[i]])[0]
      actualName = le.classes_[y_test[i]]

      # grab the face image, convert back it from 1 channel
      # into 3 channels and then resize it
      face = np.dstack([X_test[i]] * 3)
      face = imutils.resize(face , width = 250)

      # draw the actual and predicted name on the image
      cv2.putText(face , "actual : {}".format(actualName) , (5 , 25),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 0 , 255) , 2)
      cv2.putText(face , "prediction : {}".format(predName) , (5 , 60),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 255 , 0) , 2)
      
      # display the result image and the save it into
      # the desired directory 
      plt.imshow(face , cmap="gray")
      plt.savefig("output/lbp-faces/face-{}.png".format(i))
    # save the model 
  
    print("Saving the model")
    if save_model:
      # save the model 
      import pickle 
      pickle.dump(svm_classifier, open('./gender-prediction-weights-svm.pkl','wb'))

  return f1_score_, accuracy_score_

In [6]:
# Train with XGBoost

import xgboost as xgb
from skimage import feature

def train_with_xgb(input_data, test_size = 0.20 , output = True , save_model=False):
  # extract faces and label from dataset
  (faces , labels) = load_face_dataset(input_data , minSamples=15)

  # encode the labels
  le = LabelEncoder()
  labels = le.fit_transform(labels)

  # do the training process based on the number of 
  # random_states

  random_state = 42
    
  X_train , X_test , y_train , y_test = train_test_split(faces , labels , test_size = test_size,
                                                          stratify = labels , random_state = random_state)
        
  # Replace LBPH recognizer with LBP feature extraction
  lbp_features = []
  for face in X_train:
      # Calculate LBP features
      radius = 3
      n_points = 8 * radius
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      # Convert LBP image to feature vector (histogram)
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), 
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)  # Normalize
      lbp_features.append(hist)
      
  # Train Random Forest on LBP features
  xgb_classifier = xgb.XGBClassifier(objective="binary:logistic", random_state=random_state)
  xgb_classifier.fit(lbp_features, y_train)

  # Extract features from test set
  test_features = []
  for face in X_test:
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3),
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)
      test_features.append(hist)
      
  # Make predictions using Random Forest
  predictions = xgb_classifier.predict(test_features)
  
  # Calculate metrics
  accuracy_score_ = accuracy_score(y_test, predictions)
  f1_score_ = f1_score(y_test, predictions, average="macro")

  if output:
    # generate a sample of testing data
    idxs = np.random.choice(range(0 , len(X_test)) , size = 10 , replace = False)

    for i in idxs:
      # grab the actual and predicted name
      predName = le.inverse_transform([predictions[i]])[0]
      actualName = le.classes_[y_test[i]]

      # grab the face image, convert back it from 1 channel
      # into 3 channels and then resize it
      face = np.dstack([X_test[i]] * 3)
      face = imutils.resize(face , width = 250)

      # draw the actual and predicted name on the image
      cv2.putText(face , "actual : {}".format(actualName) , (5 , 25),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 0 , 255) , 2)
      cv2.putText(face , "prediction : {}".format(predName) , (5 , 60),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 255 , 0) , 2)
      
      # display the result image and the save it into
      # the desired directory 
      plt.imshow(face , cmap="gray")
      plt.savefig("output/lbp-faces/face-{}.png".format(i))
    # save the model 
  
    print("Saving the model")
    if save_model:
      # save the model 
      import pickle 
      pickle.dump(xgb_classifier, open('./gender-prediction-weights-xgb.pkl','wb'))

  return f1_score_, accuracy_score_

In [7]:
# Train with catboost

from catboost import CatBoostClassifier

def train_with_catboost(input_data, test_size = 0.20 , output = True , save_model=False):
  # extract faces and label from dataset
  (faces , labels) = load_face_dataset(input_data , minSamples=15)

  # encode the labels
  le = LabelEncoder()
  labels = le.fit_transform(labels)

  # do the training process based on the number of 
  # random_states

  random_state = 42
    
  X_train , X_test , y_train , y_test = train_test_split(faces , labels , test_size = test_size,
                                                          stratify = labels , random_state = random_state)
        
  # Replace LBPH recognizer with LBP feature extraction
  lbp_features = []
  for face in X_train:
      # Calculate LBP features
      radius = 3
      n_points = 8 * radius
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      # Convert LBP image to feature vector (histogram)
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), 
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)  # Normalize
      lbp_features.append(hist)
      
  # Train Random Forest on LBP features
  catboost_classifier = CatBoostClassifier(random_state=random_state)
  catboost_classifier.fit(lbp_features, y_train)

  # Extract features from test set
  test_features = []
  for face in X_test:
      lbp = feature.local_binary_pattern(face, n_points, radius, method='uniform')
      hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3),
                            range=(0, n_points + 2))
      hist = hist.astype("float")
      hist /= (hist.sum() + 1e-7)
      test_features.append(hist)
      
  # Make predictions using Random Forest
  predictions = catboost_classifier.predict(test_features)
  
  # Calculate metrics
  accuracy_score_ = accuracy_score(y_test, predictions)
  f1_score_ = f1_score(y_test, predictions, average="macro")

  if output:
    # generate a sample of testing data
    idxs = np.random.choice(range(0 , len(X_test)) , size = 10 , replace = False)

    for i in idxs:
      # grab the actual and predicted name
      predName = le.inverse_transform([predictions[i]])[0]
      actualName = le.classes_[y_test[i]]

      # grab the face image, convert back it from 1 channel
      # into 3 channels and then resize it
      face = np.dstack([X_test[i]] * 3)
      face = imutils.resize(face , width = 250)

      # draw the actual and predicted name on the image
      cv2.putText(face , "actual : {}".format(actualName) , (5 , 25),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 0 , 255) , 2)
      cv2.putText(face , "prediction : {}".format(predName) , (5 , 60),
                  cv2.FONT_HERSHEY_SIMPLEX , 0.8 , (0 , 255 , 0) , 2)
      
      # display the result image and the save it into
      # the desired directory 
      plt.imshow(face , cmap="gray")
      plt.savefig("output/lbp-faces/face-{}.png".format(i))
    # save the model 
  
    print("Saving the model")
    if save_model:
      # save the model 
      import pickle 
      pickle.dump(catboost_classifier, open('./gender-prediction-weights-catboost.pkl','wb'))

  return f1_score_, accuracy_score_

In [8]:
f1_score_, accuracy_score_ = train_with_lbph("gender", output=False, save_model=True)
print("F1-scores is : " , f1_score_)
print("Accuracy score is : " , accuracy_score_)


Number of images:  446


100%|██████████| 446/446 [00:00<00:00, 749.75it/s]


Saving the model
F1-scores is :  0.7441601779755285
Accuracy score is :  0.7444444444444445


In [9]:
f1_score_, accuracy_score_ = train_with_rf("gender", output=False, save_model=False)
print("F1-scores is : " , f1_score_)
print("Accuracy score is : " , accuracy_score_)


Number of images:  446


100%|██████████| 446/446 [00:00<00:00, 2020.54it/s]


F1-scores is :  0.6864111498257841
Accuracy score is :  0.6888888888888889


In [10]:
f1_score_, accuracy_score_ = train_with_svm("gender", output=False, save_model=False)
print("F1-scores is : " , f1_score_)
print("Accuracy score is : " , accuracy_score_)

Number of images:  446


100%|██████████| 446/446 [00:00<00:00, 2285.73it/s]


F1-scores is :  0.338235294117647
Accuracy score is :  0.5111111111111111


In [11]:
f1_score_, accuracy_score_ = train_with_xgb("gender", output=False, save_model=False)
print("F1-scores is : " , f1_score_)
print("Accuracy score is : " , accuracy_score_)

Number of images:  446


100%|██████████| 446/446 [00:00<00:00, 2350.15it/s]


F1-scores is :  0.696969696969697
Accuracy score is :  0.7


In [12]:
f1_score_, accuracy_score_ = train_with_catboost("gender", output=False, save_model=False)
print("F1-scores is : " , f1_score_)
print("Accuracy score is : " , accuracy_score_)

Number of images:  446


100%|██████████| 446/446 [00:00<00:00, 2206.16it/s]


Learning rate set to 0.006628
0:	learn: 0.6913661	total: 124ms	remaining: 2m 3s
1:	learn: 0.6893542	total: 127ms	remaining: 1m 3s
2:	learn: 0.6873965	total: 131ms	remaining: 43.5s
3:	learn: 0.6856996	total: 134ms	remaining: 33.4s
4:	learn: 0.6838096	total: 137ms	remaining: 27.3s
5:	learn: 0.6816398	total: 140ms	remaining: 23.2s
6:	learn: 0.6797172	total: 144ms	remaining: 20.5s
7:	learn: 0.6774550	total: 147ms	remaining: 18.3s
8:	learn: 0.6757242	total: 150ms	remaining: 16.6s
9:	learn: 0.6738136	total: 154ms	remaining: 15.2s
10:	learn: 0.6721767	total: 157ms	remaining: 14.2s
11:	learn: 0.6703657	total: 161ms	remaining: 13.2s
12:	learn: 0.6687428	total: 164ms	remaining: 12.4s
13:	learn: 0.6667133	total: 167ms	remaining: 11.8s
14:	learn: 0.6649905	total: 171ms	remaining: 11.2s
15:	learn: 0.6629569	total: 175ms	remaining: 10.7s
16:	learn: 0.6613656	total: 178ms	remaining: 10.3s
17:	learn: 0.6595228	total: 182ms	remaining: 9.91s
18:	learn: 0.6578452	total: 185ms	remaining: 9.54s
19:	learn: 