# Preparing

## Imports and Functions

In [1]:
import os
import random
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import regex as re
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models, Sequential, layers, optimizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense, Dropout, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping

### Google Import

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!unzip -uq "/content/drive/My Drive/Felix/code/AgeRecognition/raw_data/UTKFace.zip" -d "/content/"
#folder_path = '/content/drive/My Drive/Felix/code/AgeRecognition/raw_data/Faces'
folder_path = '/content/Faces'

In [5]:
def load_images_from_folder(folder_path,percent=100,gender=None):
    filenames = [f for f in os.listdir(folder_path) if not f.startswith('.')]
    filenames.sort(key=lambda f: int(re.sub('\D', '', f)))
    images = []
    for filename in filenames:
        parsed_name = filename.split('_')
        if gender == None or parsed_name[1] == gender:
            img = cv2.imread(os.path.join(folder_path,filename),cv2.IMREAD_UNCHANGED)

            scale_percent = percent # percent of original size
            width = int(img.shape[1] * scale_percent / 100)
            height = int(img.shape[0] * scale_percent / 100)
            dim = (width, height)

            # resize image
            resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
            #cv2.imshow("Resized image", resized)
            if img is not None:
                img_colors = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
                images.append(img_colors)
    return images

def load_image_data_into_dataframe(folderpath, gender = None):

    filenames = [f for f in os.listdir(folder_path) if not f.startswith('.')]
    filenames.sort(key=lambda f: int(re.sub('\D', '', f)))
    df = pd.DataFrame()
    for filename in filenames:
        parsed_name = filename.split('_')
        #print(parsed_name[1])
        if gender == None or parsed_name[1] == gender:
            df= df.append({'age' : int(parsed_name[0]),
                           'gender' : parsed_name[1],
                           'race' : parsed_name[2]}, ignore_index=True)
    df['gender'] = df['gender'].map({'0': 'male', '1': 'female'})
    df['race'] = df['race'].map({'0': 'White', '1': 'Black', '2': 'Asian', '3': 'Indian', '4': 'Others'})
    return df

def show_all_images_from_list(img_list,img_df):
    n=int(len(img_list)/5) +1 
    f = plt.figure(figsize=(25,n*5))
    for i,img in enumerate(img_list):
        f.add_subplot(n, 5, i + 1)
        plt.title(f'{img_df.iloc[i]}',
                  fontweight="bold",
                  fontsize=15)
        plt.imshow(img)
        f.tight_layout()

In [6]:
img_pic=load_images_from_folder(folder_path)

In [7]:
img_df = load_image_data_into_dataframe(folder_path)

## Targets into Age Bins of 5 (Baseline: 6%)

In [8]:
bins= [5*i for i in range(17)]
bins

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80]

In [9]:
labels = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
img_df['age_group'] = pd.cut(img_df['age'], bins=bins, labels=labels)

In [10]:
img_df

Unnamed: 0,age,gender,race,age_group
0,1.0,male,White,0
1,1.0,male,White,0
2,1.0,male,White,0
3,1.0,male,White,0
4,1.0,male,White,0
...,...,...,...,...
22000,80.0,female,Black,15
22001,80.0,female,Asian,15
22002,80.0,female,Asian,15
22003,80.0,female,Indian,15


In [11]:
img_df['age_group'].value_counts().sort_values()

15     301
14     359
13     463
2      583
12     674
1      816
11     944
9      971
3      971
8     1062
10    1176
7     1825
0     2308
6     2341
4     2545
5     4666
Name: age_group, dtype: int64

## CNN Preperations Image Size: 100*100

In [12]:
img_pic=load_images_from_folder(folder_path,50)

In [13]:
X = img_pic
y = img_df['age_group']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

X_train = np.array(X_train)
X_test =np.array(X_test)
y_train = np.array(y_train)
y_test =np.array(y_test)
X_train.shape

(19804, 100, 100, 3)

In [14]:
X_train = X_train/255 - 0.5
X_test = X_test/255 - 0.5
y_train_cat = to_categorical(y_train)
y_test_cat = to_categorical(y_test)

In [15]:
print(X_train.shape)
print(X_test.shape)

(19804, 100, 100, 3)
(2201, 100, 100, 3)


# CNN Transfer Learning

In [16]:
from tensorflow.keras.applications.vgg16 import VGG16

def load_model():
    model = VGG16(include_top=False, 
                  weights='imagenet', 
                  input_shape=X_train[0].shape
)
    return model

model = load_model()

model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 100, 100, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0     

In [17]:
def set_nontrainable_layers(model):
    model.trainable = False
    return model 
    
model = set_nontrainable_layers(model)

model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 100, 100, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0     

In [18]:
def add_last_layers(model):
    base_model = set_nontrainable_layers(model)
    flatten_layer = layers.Flatten()
    dense_layer = layers.Dense(100, activation='relu')
    prediction_layer = layers.Dense(16, activation='softmax')
    
    
    model = models.Sequential([
        base_model,
        flatten_layer,
        dense_layer,
        prediction_layer
    ])
    
    return model

model = add_last_layers(model)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 3, 3, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0         
_________________________________________________________________
dense (Dense)                (None, 100)               460900    
_________________________________________________________________
dense_1 (Dense)              (None, 16)                1616      
Total params: 15,177,204
Trainable params: 462,516
Non-trainable params: 14,714,688
_________________________________________________________________


In [19]:
def compile_model(model):
    
    opt = optimizers.Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    
    return model

In [20]:
def build_model():
    model = load_model()
    model = add_last_layers(model)
    model = compile_model(model)
    
    return model

model = build_model()

In [21]:
es = EarlyStopping(patience=5, verbose=1, restore_best_weights=True)

history = model.fit(X_train, y_train_cat, 
                    validation_split=0.3, 
                    callbacks=[es],
                    epochs=50, 
                    batch_size=32, 
                    verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Restoring model weights from the end of the best epoch.
Epoch 00009: early stopping


In [22]:
print(model.evaluate(X_test, y_test_cat, verbose=0)[1])

0.3412085473537445
