## Initialization

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

## Load Data

The dataset is stored in the `/datasets/faces/` folder, there you can find
- The `final_files` folder with 7.6k photos
- The `labels.csv` file with labels, with two columns: `file_name` and `real_age`

Given the fact that the number of image files is rather high, it is advisable to avoid reading them all at once, which would greatly consume computational resources. We recommend you build a generator with the ImageDataGenerator generator. This method was explained in Chapter 3, Lesson 7 of this course.

The label file can be loaded as an usual CSV file.

In [None]:
labels = pd.read_csv('/datasets/faces/labels.csv')

train_datagen = ImageDataGenerator(rescale=1./255)

train_gen_flow = train_datagen.flow_from_dataframe(
        dataframe=labels,
        directory='/datasets/faces/final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=32,
        class_mode='raw',
        seed=12345)

In [None]:
# check head of dataframe
labels.head()

In [None]:
# show labels information
labels.info()

In [None]:
# shape of dataset
labels.shape

In [None]:
# descriptive labels
labels.describe()

### Conclusion
From the initial look at the data, we can see that the data has 7591 rows and 2 columns. From the description of the data, we can see that the mean age is 31 years and the maximum age is 100 years.

## EDA

In [None]:
# images of different people and their ages
features, target = next(train_gen_flow)
fig = plt.figure(figsize=(10,10))
for i in range(16):
    fig.add_subplot(4, 4, i+1)
    plt.imshow(features[i])
    plt.title(target[i])
    plt.xticks([])
    plt.yticks([])
    plt.tight_layout()


In [None]:
# distribution of ages
plt.figure(figsize=(12, 6))
sns.distplot(labels['real_age'], kde=True, bins=100, color='blue')
plt.title('Distribution of age')
plt.ylabel('Distribution')
plt.xlabel('Real age')
plt.show()

### Findings
We can see that the average age of customers to the store is around 30 years. Most people aged 17 - 41 years visited the store most often. There were less elderly people visiting the store. The high number of children aged 0 - 9 years that visited the store may be because some parents visited the store with their children.

## Modelling

Define the necessary functions to train your model on the GPU platform and build a single script containing all of them along with the initialization section.

To make this task easier, you can define them in this notebook and run a ready code in the next section to automatically compose the script.

The definitions below will be checked by project reviewers as well, so that they can understand how you built the model.

In [None]:
import pandas as pd

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
def load_train(path):
    """
    It loads the train part of dataset from path
    """
    labels = pd.read_csv('/datasets/faces/labels.csv')
    # data generator
    train_datagen = ImageDataGenerator(
        validation_split=0.25,
        rescale=1./255,
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rotation_range=90)

    # extract data from the directory
    train_gen_flow = train_datagen.flow_from_dataframe(
        dataframe=labels,
        directory='/datasets/faces/final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=32,
        class_mode='raw',
        subset='training',
        seed=12345)

    return train_gen_flow

In [None]:
ddef load_test(path):
    """
    It loads the validation/test part of dataset from path
    """
    labels = pd.read_csv('/datasets/faces/labels.csv')
    # validation data generator
    test_datagen = ImageDataGenerator(validation_split=0.25, rescale=1/255)

    # extract data from the directory
    test_gen_flow = test_datagen.flow_from_dataframe(
        dataframe=labels,
        directory='/datasets/faces/final_files/',
        x_col='file_name',
        y_col='real_age',
        target_size=(224, 224),
        batch_size=32,
        class_mode='raw',
        subset='validation',
        seed=12345)

    return test_gen_flow

In [None]:
def create_model(input_shape):
    """
    It defines the model
    """
    backbone = ResNet50(
        input_shape=input_shape, weights='imagenet', include_top=False
    )

    # define the model
    model = Sequential()
    
    # add layers to model
    model.add(backbone)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(350, activation='relu'))
    model.add(Dense(185, activation='relu'))
    model.add(Dense(1, activation='relu'))
    
    # add compiler
    optimizer = Adam(lr=0.0001)
    model.compile(optimizer=optimizer,
                  loss='mse', metrics=['mae']
                  )

    return model


In [None]:
def train_model(
    model, 
    train_data, 
    test_data, 
    batch_size=None, 
    epochs=20,
    steps_per_epoch=None, 
    validation_steps=None
):
    """
    Trains the model given the parameters
    """
    model.fit(
        train_data,
        validation_data=test_data,
        epochs=10,
        verbose=2,
        batch_size=batch_size,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
    )

    return model

### Prepare the Script to Run on the GPU Platform

Given you've defined the necessary functions you can compose a script for the GPU platform, download it via the "File|Open..." menu, and to upload it later for running on the GPU platform.

N.B.: The script should include the initialization section as well. An example of this is shown below.

In [None]:
# prepare a script to run on the GPU platform

init_str = """
import pandas as pd

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
"""

import inspect

with open('run_model_on_gpu.py', 'w') as f:
    
    f.write(init_str)
    f.write('\n\n')
        
    for fn_name in [load_train, load_test, create_model, train_model]:
        
        src = inspect.getsource(fn_name)
        f.write(src)
        f.write('\n\n')