In [1]:
import dlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score
from sklearn.linear_model import LogisticRegressionCV

In [21]:
predictor_path = "./shape_predictor_68_face_landmarks.dat"
face_rec_model_path = "./dlib_face_recognition_resnet_model_v1.dat"
 
detector = dlib.get_frontal_face_detector() #a detector to find the faces
sp = dlib.shape_predictor(predictor_path ) #shape predictor to find face landmarks
facerec = dlib.face_recognition_model_v1(face_rec_model_path) #face recognition model
img_data=np.zeros((5000,128))

In [None]:
img_data_train=np.zeros((5000,128))
for i in range (5000):
    img = dlib.load_rgb_image('../datasets/celeba/img/' + str(i) + '.jpg')
    dets = detector(img, 1)  #Extract the face area in the picture
    for k, d in enumerate(dets):
        shape = sp(img, d)
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        img_data_train[i,:]=face_descriptor

In [None]:
img_data_test=np.zeros((1000,128))
for i in range (1000):
    img = dlib.load_rgb_image('../datasets/celeba_test/img/' + str(i) + '.jpg')
    dets = detector(img, 1)  #Extract the face area in the picture
    for k, d in enumerate(dets):
        shape = sp(img, d)
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        img_data_test[i,:]=face_descriptor

In [2]:
img_data_train = pd.read_csv('./face_feature_train.csv')
img_data_train=np.array(img_data_train)
img_data_train=img_data_train[:,1:]

In [3]:
img_data_test = pd.read_csv('./face_feature_test.csv')
img_data_test=np.array(img_data_test)
img_data_test=img_data_test[:,1:]

In [4]:
label_train=pd.read_table('../datasets/celeba/labels.csv')

In [5]:
label_test=pd.read_table('../datasets/celeba_test/labels.csv')

In [6]:
x_train = img_data_train
y_train= label_train['gender']
x_test = img_data_test
y_test= label_test['gender']

In [7]:
x_train = StandardScaler().fit_transform(x_train)
x_test = StandardScaler().fit_transform(x_test)

In [8]:
logreg = LogisticRegression(solver='lbfgs',max_iter=1000)
logreg.fit(x_train, y_train)
y_pred= logreg.predict(x_test)
print('Accuracy on train set:'+str(logreg.score(x_train,y_train)))
print('Accuracy on test set: '+str(accuracy_score(y_test,y_pred)))
print(classification_report(y_test,y_pred))#text report showing the main classification metrics

Accuracy on train set:0.9746
Accuracy on test set: 0.968
              precision    recall  f1-score   support

          -1       0.97      0.96      0.97       500
           1       0.96      0.97      0.97       500

    accuracy                           0.97      1000
   macro avg       0.97      0.97      0.97      1000
weighted avg       0.97      0.97      0.97      1000



In [10]:
import pickle


pickle.dump(logreg,open("LogisticRegression_gender_dlib.dat","wb")) 


loaded_model = pickle.load(open("LogisticRegression_gender_dlib.dat","rb"))


y_pred=loaded_model.predict(x_test)

In [12]:
logregcv = LogisticRegressionCV(solver='lbfgs',cv=50,max_iter=5000)
logregcv.fit(x_train, y_train)
y_pred_cv= logregcv.predict(x_test)
print('Accuracy on train set:'+str(logreg.score(x_train,y_train)))
print('Accuracy on test set: '+str(accuracy_score(y_test,y_pred)))
print(classification_report(y_test,y_pred))#text report showing the main classification metrics

Accuracy on train set:0.9746
Accuracy on test set: 0.968
              precision    recall  f1-score   support

          -1       0.97      0.96      0.97       500
           1       0.96      0.97      0.97       500

    accuracy                           0.97      1000
   macro avg       0.97      0.97      0.97      1000
weighted avg       0.97      0.97      0.97      1000

