# 1. Exploratory data analysis

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet import ResNet50

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import copy

pd.set_option('display.max_row', 100)
pd.set_option('display.max_columns',100)

In [None]:
labels = pd.read_csv('/datasets/faces/labels.csv')
labels.info()

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_gen_flow = train_datagen.flow_from_dataframe(
        dataframe=labels,
        directory='/datasets/faces/final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=32,
        class_mode='raw',
        seed=12345) 

In [None]:
features, target = next(train_gen_flow)

In [None]:
fig = plt.figure(figsize=(10,10))
for i in range(16):
    fig.add_subplot(4, 4, i+1)
    plt.imshow(features[i])
    plt.xticks([])
    plt.yticks([])
    plt.tight_layout()

In [None]:
ax = sns.distplot(target)
ax.set_title('Age Distribution Graph')
ax.set_xlabel('Age, years')
ax.set_ylabel('Probability Density')

The data is almost normally distributed with a tail on the right side.
<br>When applying the previously studied methods, it would be necessary to carry out pre-processing, although in the case of neural networks, I'm not sure that this is necessary.

# 2 Train the Model

```python

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
import matplotlib.pyplot as plt
import numpy as np


def load_train(path):

    labels = pd.read_csv(path + 'labels.csv')

    train_datagen = ImageDataGenerator(
    rescale=1./255, 
	horizontal_flip=True, 
	vertical_flip=True, 
	rotation_range=90, 
        width_shift_range=0.2,
        height_shift_range=0.2)

    train_datagen_flow = train_datagen.flow_from_dataframe(
        dataframe=labels,
    	directory=path,
        x_col='file_name',
        y_col='real_age',
    	target_size=(224, 224),
    	batch_size=32,
    	class_mode='raw',
    	seed=12345)

    return train_datagen_flow



def load_test(path):

    labels = pd.read_csv(path + 'labels.csv')

    test_datagen = ImageDataGenerator(
    	rescale=1./255)

    test_datagen_flow = test_datagen.flow_from_dataframe(
        dataframe=labels,
    	directory=path,
        x_col='file_name',
        y_col='real_age',
    	target_size=(224, 224),
    	batch_size=32,
    	class_mode='raw',
    	seed=12345)

    return test_datagen_flow

def create_model(input_shape):

    optimizer = Adam(lr=0.0001)

    backbone = ResNet50(input_shape=input_shape,
                    weights='imagenet', 
                    include_top=False)

    model = Sequential()
    model.add(backbone)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1, activation='relu')) 

    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae'])

    return model


def train_model(model, train_datagen_flow, test_datagen_flow, batch_size=32, epochs=5,
               steps_per_epoch=None, validation_steps=None):

    model.fit(train_datagen_flow,
          validation_data=test_datagen_flow,
          steps_per_epoch=steps_per_epoch,
          validation_steps=validation_steps,
          verbose=2, epochs=epochs)

    return model

```


Or slightly different variation

```python

import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
 

def load_train(path):
    labels = pd.read_csv(path + 'labels.csv')
    train_datagen = ImageDataGenerator(
        validation_split=0.25,
        horizontal_flip=True,
        rescale=1./255)
    train_gen_flow = train_datagen.flow_from_dataframe(
        dataframe=labels,
        directory=path + 'final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=16,
        class_mode='raw',
        subset='training',
        seed=12345)
 
    return train_gen_flow
 

def load_test(path):
    labels = pd.read_csv(path + 'labels.csv')
    test_datagen = ImageDataGenerator(
        validation_split=0.25,
        rescale=1./255)
    test_gen_flow = test_datagen.flow_from_dataframe(
        dataframe=labels,
        directory=path + 'final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=16,
        class_mode='raw',
        subset='validation',
        seed=12345)
 
    return test_gen_flow


def create_model(input_shape):
    backbone = ResNet50(weights='imagenet',
                        input_shape=input_shape,
                        include_top=False)
 
    model = Sequential()
    model.add(backbone)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1, activation='relu'))
 
    optimizer = Adam(lr=0.0001)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
 
    return model


def train_model(model, train_datagen_flow, val_datagen_flow, batch_size=None, epochs=5, 
                steps_per_epoch=None, validation_steps=None):
    
    model.fit(train_datagen_flow, validation_data=val_datagen_flow, epochs=epochs,
              steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, verbose=2)
    return model


```


```

Train for 356 steps, validate for 119 steps
Epoch 1/5
2021-01-31 18:51:47.963670: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-01-31 18:51:48.265085: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
356/356 - 50s - loss: 190.1777 - mae: 10.1562 - val_loss: 523.5768 - val_mae: 17.7575
Epoch 2/5
356/356 - 41s - loss: 82.1405 - mae: 6.9148 - val_loss: 246.2643 - val_mae: 12.0417
Epoch 3/5
356/356 - 41s - loss: 57.3234 - mae: 5.8354 - val_loss: 77.2597 - val_mae: 6.5555
Epoch 4/5
356/356 - 41s - loss: 41.8642 - mae: 4.9483 - val_loss: 76.4013 - val_mae: 6.6034
Epoch 5/5
356/356 - 41s - loss: 33.1005 - mae: 4.3866 - val_loss: 79.6053 - val_mae: 6.4891

```