In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline
from PIL import Image
import tensorflow as tf
from tensorflow.keras.utils import load_img
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
BASE_DIR = './input/utkface-new/UTKFace/'

image_paths = []
age_labels = []
gender_labels = []
race_labels = []

for filename in tqdm(os.listdir(BASE_DIR)):
    image_path = os.path.join(BASE_DIR, filename)
    temp = filename.split('_')
    if len(temp) == 4:
        age = int(temp[0])
        gender = int(temp[1])
        race = int(temp[2])
        image_paths.append(image_path)
        age_labels.append(age)
        gender_labels.append(gender)
        race_labels.append(race)

  0%|          | 0/16383 [00:00<?, ?it/s]

In [3]:
df = pd.DataFrame()
df['image'], df['age'], df['gender'], df['race']= image_paths, age_labels, gender_labels, race_labels
df.head()

Unnamed: 0,image,age,gender,race
0,./input/utkface-new/UTKFace/1_0_2_201612191419...,1,0,2
1,./input/utkface-new/UTKFace/30_0_0_20170117144...,30,0,0
2,./input/utkface-new/UTKFace/38_1_1_20170113001...,38,1,1
3,./input/utkface-new/UTKFace/30_0_1_20170104170...,30,0,1
4,./input/utkface-new/UTKFace/29_1_1_20170112211...,29,1,1


In [4]:
def extract_features(images):
    features = []
    i=0
    for image in tqdm(images):
        img = load_img(image, grayscale=True)
        img = img.resize((128, 128), Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)
        i+=1
        if i>=5000:
            break
        
    features = np.array(features)
    # ignore this step if using RGB
    features = features.reshape(len(features), 16384)
    return features

In [5]:
X = extract_features(df['image'])

  0%|          | 0/16382 [00:00<?, ?it/s]

In [6]:
X = X/255.0
y_gender = np.array(df['gender'])

In [7]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier',SVC())])
kfold = StratifiedKFold(n_splits=5, random_state = 123, shuffle = True)

In [None]:
param_grid = {
    'classifier__gamma' : [0.001,0.1,10],
    'classifier__C': [0.001, 0.1,  10]
}
grid = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score = True)

In [10]:
grid.fit(X, y_gender[:5000])

KeyboardInterrupt: 

In [None]:
grid.best_params_

In [None]:
def heatmap(values, xlabel, ylabel, xticklabels, yticklabels, cmap=None,
            vmin=None, vmax=None, ax=None, fmt="%0.2f"):
    if ax is None:
        ax = plt.gca()
    # plot the mean cross-validation scores
    img = ax.pcolor(values, cmap=cmap, vmin=vmin, vmax=vmax)
    img.update_scalarmappable()
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_xticks(np.arange(len(xticklabels)) + .5)
    ax.set_yticks(np.arange(len(yticklabels)) + .5)
    ax.set_xticklabels(xticklabels)
    ax.set_yticklabels(yticklabels)
    ax.set_aspect(1)

    for p, color, value in zip(img.get_paths(), img.get_facecolors(),
                               img.get_array()):
        x, y = p.vertices[:-2, :].mean(0)
        if np.mean(color[:3]) > 0.5:
            c = 'k'
        else:
            c = 'w'
        ax.text(x, y, fmt % value, color=c, ha="center", va="center")
    return img

In [None]:
results = pd.DataFrame(grid.cv_results_)

In [None]:
scores = np.array(results.mean_test_score).reshape(5,5,1)
scores = scores[:,:,0]
# plot the mean cross-validation scores
heatmap(scores, xlabel='classifier__gamma', xticklabels=param_grid['classifier__gamma'], ylabel='classifier__C', yticklabels=param_grid['classifier__C'], cmap="viridis")
plt.show()