In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier

# Importing Data

In [2]:
data = pickle.load(open('face_gender_classifier.pickle', mode='rb'))

In [3]:
X = np.array(data['data'])
y = np.array(data['label'])

In [4]:
X.shape, y.shape

((46989, 1, 128), (46989,))

In [5]:
X = X.reshape(-1,128)
X.shape

(46989, 128)

# Splitting data into training and testing set

In [6]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [7]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((37591, 128), (9398, 128), (37591,), (9398,))

In [8]:
print(X_test)

[[-0.0381585   0.13729748 -0.16422193 ...  0.05034842 -0.01801231
   0.03217039]
 [ 0.06161663  0.09636125  0.04278971 ...  0.00075633  0.15842965
   0.02209096]
 [ 0.03930619 -0.0567132  -0.06335641 ... -0.01752661  0.08501597
   0.02548062]
 ...
 [ 0.1164023  -0.01490939 -0.02490834 ...  0.03187592 -0.00281047
  -0.00752004]
 [ 0.06713895  0.04530767  0.10807777 ...  0.03463005  0.19246559
   0.11469107]
 [ 0.01660476  0.08911455  0.27995455 ...  0.09014833  0.13532545
   0.01790298]]


In [9]:
print(y_test)

['Male' 'Female' 'Female' ... 'Female' 'Male' 'Male']


# Training Model

## Logistic Regression

In [10]:
logistic_classifier = LogisticRegression()
logistic_classifier.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [11]:
def get_report(model, X_train, y_train, X_test, y_test):
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    
    accuracy_train = accuracy_score(y_train, y_pred_train)
    accuracy_test = accuracy_score(y_test, y_pred_test)

    # F1 Score
    f1_score_train = f1_score(y_train, y_pred_train,average='macro')
    f1_score_test = f1_score(y_test,y_pred_test,average='macro')

    print('Accuracy Train =', accuracy_train)
    print('Accuracy Test =', accuracy_test)
    print('F1 Score Train =', f1_score_train)
    print('F1 Score Test =', f1_score_test)

In [12]:
get_report(logistic_classifier, X_train, y_train, X_test, y_test)

Accuracy Train = 0.8869144210050278
Accuracy Test = 0.8910406469461588
F1 Score Train = 0.886912053420713
F1 Score Test = 0.8910390481061448


## Support Vector Machine

In [13]:
model_svc = SVC(probability=True)
model_svc.fit(X_train, y_train)

In [14]:
get_report(model_svc, X_train, y_train, X_test, y_test)

Accuracy Train = 0.9187305472054481
Accuracy Test = 0.9130666099170036
F1 Score Train = 0.9187257766379429
F1 Score Test = 0.9130665301907877


## Random Forest

In [15]:
model_rf = RandomForestClassifier()
model_rf.fit(X_train, y_train)

In [16]:
get_report(model_rf, X_train, y_train, X_test, y_test)

Accuracy Train = 1.0
Accuracy Test = 0.8924239199829751
F1 Score Train = 1.0
F1 Score Test = 0.8924227494788552


# Voting Classifier

In [17]:
model_voting = VotingClassifier(estimators = [
    ('logisitc', LogisticRegression()),
    ('svm',SVC(probability=True)),
    ('rf',RandomForestClassifier())
],voting='soft',weights=[2,3,1])

In [18]:
model_voting.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
get_report(model_voting, X_train, y_train, X_test, y_test)

Accuracy Train = 0.924503205554521
Accuracy Test = 0.9082783570972547
F1 Score Train = 0.9244953675883001
F1 Score Test = 0.9082777589250077


# Saving Model

In [20]:
pickle.dump(model_svc,open('./models/face_gender.pkl',mode='wb'))