In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score
import time
%matplotlib inline

In [None]:
#Loading Labels
traindf = pd.read_excel('/content/drive/My Drive/Paper/UTKFace/traindf.xlsx')
valdf = pd.read_excel('/content/drive/My Drive/Paper/UTKFace/valdf.xlsx')
testdf = pd.read_excel('/content/drive/My Drive/Paper/UTKFace/testdf.xlsx')
traindf.head()

Unnamed: 0.1,Unnamed: 0,age,gender,race,file
0,21938,50,0,3,50_0_3_20170119204521040.jpg.chip.jpg
1,10250,49,0,3,49_0_3_20170104214426925.jpg.chip.jpg
2,17693,24,0,2,24_0_2_20170116172525947.jpg.chip.jpg
3,649,55,0,3,55_0_3_20170119171117830.jpg.chip.jpg
4,7397,80,1,0,80_1_0_20170110131358567.jpg.chip.jpg


In [None]:
X_train = np.load('/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/X_train.npy')
X_val = np.load('/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/X_val.npy')
X_test = np.load('/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/X_test.npy')

In [None]:
def scale(X):
  mean = X.mean(axis=0)
  std = X.std(axis=0)
  X-=mean
  X/=std
  return X

In [None]:
X_train_scaled = scale(X_train)
X_val_scaled = scale(X_val)
X_test_scaled = scale(X_test)

In [None]:
y_train = traindf['gender'].values
y_val = valdf['gender'].values
y_test = testdf['gender'].values

Gender ML Models


In [None]:
#@title Installing hypopt
!pip install hypopt -q


In [None]:
#@title Logistic Regression --- Baseline

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

model = LogisticRegression(max_iter=1000,class_weight='balanced',random_state=0)
model.fit(X_train_scaled,y_train)

y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/LR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9738410421391277
Test : 0.9211303247574862

 ROC_AUC_SCORE : 

Train : 0.9739300186502483
Test : 0.9206005738893189
<bound method BaseEstimator.get_params of LogisticRegression(C=1.0, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)>
0.9211303247574862


In [None]:
#@title Linear SVC --- Baseline

from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, roc_auc_score

model = LinearSVC(class_weight='balanced',random_state=0)
model.fit(X_train_scaled,y_train)

y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/Linear SVC.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)



ACCURACY : 

Train : 0.9612889615526607
Test : 0.9143821172501054

 ROC_AUC_SCORE : 

Train : 0.9608809873372284
Test : 0.9135299467306413
<bound method BaseEstimator.get_params of LinearSVC(C=1.0, class_weight='balanced', dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
          verbose=0)>
0.9143821172501054


In [None]:
#@title SVC --- Baseline

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score

model = SVC(class_weight='balanced',random_state=0)
model.fit(X_train_scaled,y_train)

y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/SVC.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9713095300880755
Test : 0.9464361029101644

 ROC_AUC_SCORE : 

Train : 0.9714215992838662
Test : 0.946432720529016
<bound method BaseEstimator.get_params of SVC(C=1.0, break_ties=False, cache_size=200, class_weight='balanced', coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)>
0.9464361029101644


In [None]:
#@title Decision Tree --- Baseline

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

model = DecisionTreeClassifier(class_weight='balanced',random_state=0)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/DTree.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.998681504140077
Test : 0.5925769717418811

 ROC_AUC_SCORE : 

Train : 0.9987373737373738
Test : 0.5948667125664084
<bound method BaseEstimator.get_params of DecisionTreeClassifier(ccp_alpha=0.0, class_weight='balanced', criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')>
0.5925769717418811


In [None]:
#@title BaggingClassifier - LR --- Baseline

from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

lr = LogisticRegression(max_iter=1000,class_weight='balanced',random_state=0)
model = BaggingClassifier(base_estimator=lr, random_state=0)
model.fit(X_train_scaled,y_train)

y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/Bagging LR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9839670903433363
Test : 0.929143821172501

 ROC_AUC_SCORE : 

Train : 0.9840151375804174
Test : 0.9287139234443623
<bound method BaseEstimator.get_params of BaggingClassifier(base_estimator=LogisticRegression(C=1.0,
                                                    class_weight='balanced',
                                                    dual=False,
                                                    fit_intercept=True,
                                                    intercept_scaling=1,
                                                    l1_ratio=None,
                                                    max_iter=1000,
                                                    multi_class='auto',
                                                    n_jobs=None, penalty='l2',
                                                    random_state=0,
                                                    solver='lbfgs', tol=0.0001,
                                          

In [None]:
#@title Random Forest - LR --- Baseline
import time 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

tic = time.time()
model = RandomForestClassifier(random_state=0,class_weight='balanced')
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/RF.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.998681504140077
Test : 0.929143821172501

 ROC_AUC_SCORE : 

Train : 0.9987093147566605
Test : 0.9281262784574835
Elapsed Time: 134
<bound method BaseEstimator.get_params of RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)>
0.929143821172501


In [None]:
#@title Ada Boost

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

tic = time.time()
dt = DecisionTreeClassifier(class_weight='balanced',random_state=0)
model = AdaBoostClassifier(base_estimator=dt, random_state=0)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/AdaBoost dt.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

In [None]:
#@title Gradient Boosted Trees

from sklearn.ensemble import GradientBoostingClassifier

tic = time.time()
model = GradientBoostingClassifier(random_state=0)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/GBoost Clf.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9514793523548336
Test : 0.9337832138338253

 ROC_AUC_SCORE : 

Train : 0.9516039603629274
Test : 0.9343808310134006
Elapsed Time: 1475
<bound method BaseEstimator.get_params of GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=0, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)>
0.9337832138338253


In [None]:
#@title LDA

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

tic= time.time()
model = LinearDiscriminantAnalysis()
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/LDA.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9530088075523443
Test : 0.9439055250948967

 ROC_AUC_SCORE : 

Train : 0.9530873128147159
Test : 0.9438979001580834
Elapsed Time: 30
<bound method BaseEstimator.get_params of LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)>
0.9439055250948967


In [None]:
#@title QDA

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

tic= time.time()
model = QuadraticDiscriminantAnalysis()
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/QDA.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9880280575918992
Test : 0.9380008435259384

 ROC_AUC_SCORE : 

Train : 0.9882126752571516
Test : 0.9383981275186057
Elapsed Time: 35
<bound method BaseEstimator.get_params of QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
                              store_covariance=False, tol=0.0001)>
0.9380008435259384


In [None]:
#@title XGBOOST

from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

tic = time.time()
model = XGBClassifier(objective='binary:logistic',random_state=0,booster='gbtree',)
model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/XGB.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

ACCURACY : 

Train : 0.9502663361637045
Test : 0.9380008435259384

 ROC_AUC_SCORE : 

Train : 0.9504049322270269
Test : 0.9378104825317272
Elapsed Time: 252
<bound method XGBModel.get_params of XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)>
0.9380008435259384
