In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score
import time
from sklearn.metrics import mean_absolute_error,mean_squared_error
%matplotlib inline

In [None]:
#Loading Labels
traindf = pd.read_excel('/content/drive/My Drive/Paper/UTKFace/traindf.xlsx')
valdf = pd.read_excel('/content/drive/My Drive/Paper/UTKFace/valdf.xlsx')
testdf = pd.read_excel('/content/drive/My Drive/Paper/UTKFace/testdf.xlsx')
traindf.head()

In [None]:
X_train = np.load('/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/X_train.npy')
X_val = np.load('/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/X_val.npy')
X_test = np.load('/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/X_test.npy')

In [None]:
def scale(X):
  mean = X.mean(axis=0)
  std = X.std(axis=0)
  X-=mean
  X/=std
  return X

In [None]:
X_train_scaled = scale(X_train)
X_val_scaled = scale(X_val)
X_test_scaled = scale(X_test)

In [None]:
y_train = traindf['age'].values
y_val = valdf['age'].values
y_test = testdf['age'].values

Gender ML Models


In [None]:
#@title Installing hypopt
!pip install hypopt -q


In [None]:
#@title Linear Regression --- Baseline

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

model = LinearRegression(normalize=True)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))


print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/LR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 4.933252584568955
Test : 5.610466284170678
Loss : 

Train : 44.177113083619126
Test : 54.97710102989039
<bound method BaseEstimator.get_params of LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)>
0.8507123674419581


In [None]:
#@title Linear SVR --- Baseline

from sklearn.svm import LinearSVR

tic = time.time()
model = LinearSVR(random_state=0)
model.fit(X_train_scaled,y_train)

toc = time.time()
y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/Linear SVR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 4.847638457889041
Test : 5.577168755191247
Loss : 

Train : 49.14231269738753
Test : 54.9335013638218

 Time : 89
<bound method BaseEstimator.get_params of LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=0, tol=0.0001, verbose=0)>
0.8508307602055954




In [None]:
#@title SVR --- Baseline

from sklearn.svm import SVR

tic = time.time()
model = SVR()
model.fit(X_train_scaled,y_train)

toc = time.time()
y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/SVR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 4.853982449574005
Test : 5.49109775442805
Loss : 

Train : 51.28689394196117
Test : 56.001306304658556

 Time : 963
<bound method BaseEstimator.get_params of SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)>
0.8479311880443674


In [None]:
#@title Decision Tree --- Baseline

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score, roc_auc_score

tic = time.time()
model = DecisionTreeRegressor(random_state=0)
model.fit(X_train,y_train)

toc = time.time()

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/DTree.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 0.04751859079162491
Test : 9.862083509067904
Loss : 

Train : 0.35429741750610905
Test : 211.28679881906368

 Time : 64
<bound method BaseEstimator.get_params of DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')>
0.4262610178496678


In [None]:
#@title BaggingRegressor - LR --- Baseline

from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, roc_auc_score

tic = time.time()
lr = LinearRegression()
model = BaggingRegressor(base_estimator=lr, random_state=0)
model.fit(X_train_scaled,y_train)

toc= time.time()

y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/BaggingReg LR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 4.967048920022852
Test : 5.654437052404947
Loss : 

Train : 44.711666909758726
Test : 55.61890628100071

 Time : 75
<bound method BaseEstimator.get_params of BaggingRegressor(base_estimator=LinearRegression(copy_X=True,
                                                 fit_intercept=True,
                                                 n_jobs=None, normalize=False),
                 bootstrap=True, bootstrap_features=False, max_features=1.0,
                 max_samples=1.0, n_estimators=10, n_jobs=None, oob_score=False,
                 random_state=0, verbose=0, warm_start=False)>
0.8489695766307532


In [None]:
#@title Random Forest --- Baseline
import time 
from sklearn.ensemble import RandomForestRegressor

tic = time.time()
model = RandomForestRegressor(random_state=0)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

toc = time.time()

print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/BaggingReg LR.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 1.9074806342258086
Test : 5.749584821219364
Loss : 

Train : 7.725207149817541
Test : 57.492947152930235

 Time : 3734
<bound method BaseEstimator.get_params of RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)>
0.8438807101782413


In [None]:
#@title Ada Boost

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

tic = time.time()
dt = DecisionTreeClassifier(class_weight='balanced',random_state=0)
model = AdaBoostClassifier(base_estimator=dt, random_state=0)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print('ACCURACY : \n')
print('Train : ' + str(accuracy_score(y_train,y_pred_train)))
print('Test : ' + str(accuracy_score(y_test,y_pred_test)))

print('\n ROC_AUC_SCORE : \n')
print('Train : ' + str(roc_auc_score(y_train,y_pred_train)))
print('Test : ' + str(roc_auc_score(y_test,y_pred_test)))

toc = time.time()
print('Elapsed Time: '+ str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Gender/Untuned/AdaBoost dt.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

In [None]:
#@title Gradient Boosted Trees

from sklearn.ensemble import GradientBoostingRegressor

tic = time.time()
model = GradientBoostingRegressor(random_state=0)
model.fit(X_train,y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

toc = time.time()
print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/GradientBoostingReg.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 4.973619950912951
Test : 6.166144678274488
Loss : 

Train : 46.70570489439083
Test : 67.45696351094071

 Time : 1492
<bound method BaseEstimator.get_params of GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=0, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)>
0.8168239104381414


In [None]:
#@title XGBOOST

from xgboost import XGBRegressor

tic = time.time()
model = XGBRegressor(objective='reg:squarederror',random_state=0,booster='gbtree',)
model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

toc = time.time()
print('MAE : \n')
print('Train : ' + str(mean_absolute_error(y_train,y_pred_train)))
print('Test : ' + str(mean_absolute_error(y_test,y_pred_test)))

print('Loss : \n')
print('Train : ' + str(mean_squared_error(y_train,y_pred_train)))
print('Test : ' + str(mean_squared_error(y_test,y_pred_test)))

print('\n Time : '+str(int(toc-tic)))

print(model.get_params)
filename = "/content/drive/My Drive/Paper/UTKFace/ML/SeNet50/Age/Untuned/xgboost reg.sav"
pickle.dump(model, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test_scaled, y_test)
print(result)

MAE : 

Train : 4.998431925585385
Test : 5.892288032461952
Loss : 

Train : 47.3688861122858
Test : 60.51738902801346

 Time : 243
<bound method XGBModel.get_params of XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:squarederror',
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)>
0.8356679859915822
