In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import warnings
import seaborn as sns
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.callbacks import ModelCheckpoint


In [None]:
BASE_DIR = '/kaggle/input/uniform-augmented-resized-gray-set-age-0-100/uniform_augmented_resized_gray_set_age_0_100'

In [None]:
import cv2

def read_images(input_folder):
    IMG_TOTAL = 269_991
    IMG_VAL = 53_998
    IMG_TRAIN = IMG_TOTAL - IMG_VAL
    
    COUNTER = 0 # when it reaches 4 we zero it and take image for val. So every 5th picture is in training
    age = 0

    train_img = np.empty((IMG_TRAIN, 128, 128, 1), dtype=np.uint8)
    train_age = np.empty(IMG_TRAIN, dtype=np.uint8)
    train_counter = 0

    val_img = np.empty((IMG_VAL, 128, 128, 1), dtype=np.uint8)
    val_age = np.empty(IMG_VAL, dtype=np.uint8)
    val_counter = 0
    
    for number_dir in sorted(os.listdir(input_folder), key=lambda x: int(x)):
        input_number_dir = os.path.join(input_folder, number_dir)
        print(f"{input_number_dir}")
        
        if not os.path.isdir(input_number_dir):
            continue
        
        for filename in os.listdir(input_number_dir):
            input_path = os.path.join(input_number_dir, filename)

            img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
            
            img = img.reshape(128,128,1)
            if COUNTER == 4: #goes to validation
                val_img[val_counter] = img
                val_age[val_counter] = age
                val_counter += 1
                COUNTER = 0
            else:
                train_img[train_counter] = img
                train_age[train_counter] = age
                train_counter += 1
                COUNTER += 1

        age = age + 1
    
    return train_img, train_age, train_counter, val_img, val_age, val_counter

In [None]:
train_img, train_age, train_counter, val_img, val_age, val_counter = read_images(BASE_DIR)
train_counter, val_counter

In [None]:
unique_ages = np.unique(val_age)
unique_ages

In [None]:
plt.hist(train_age, bins=np.size(unique_ages))
plt.hist(val_age, bins=np.size(unique_ages))

### initialising tpu

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

## Defining model

In [None]:
with tpu_strategy.scope():    
    model = tf.keras.Sequential([
      tf.keras.layers.Rescaling(1./255, input_shape=(128,128, 1)),
      tf.keras.layers.Conv2D(64, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(64, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(256, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(256, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(256, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Dense(256, activation='relu'),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Dense(256, activation='relu'),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Dense(256, activation='relu'),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Dense(1, activation='relu')
    ])
    model.compile(
      optimizer='adam',
      loss='mae',
      steps_per_execution=64 # in tpu guide was 32
    )
    
model.summary()

### model training


In [None]:
checkpoint_cb = ModelCheckpoint(
    'best_model_new_v25.h5',
    save_best_only=True,
    monitor='val_loss',
    mode='min'
)

In [None]:
BATCH_SIZE = 128 * 8 # 8 cores
EPOCHS = 200

history = model.fit(
    x=train_img,
    y=train_age,
    validation_data=(val_img, val_age),
    batch_size=BATCH_SIZE, 
    epochs=EPOCHS, 
    callbacks=[checkpoint_cb])

### plot of losses

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = np.arange(np.size(loss))

plt.plot(epochs, loss, label='loss')
plt.plot(epochs, val_loss, label='val_Loss')
plt.show()

### loading best model

In [None]:
with tpu_strategy.scope():
    load_locally = tf.saved_model.LoadOptions(experimental_io_device='/job:localhost')
    model = tf.keras.models.load_model('/kaggle/input/model-v5/model_v5.h5', options=load_locally) # loading in Tensorflow's "SavedModel" format

### validating

In [None]:
BATCH_SIZE = 128 * 8 # 8 cores
model.evaluate(val_img, val_age, batch_size=BATCH_SIZE)

## scatter plot of predictions

In [None]:
BATCH_SIZE = 128 * 8
predicted = model.predict(val_img, batch_size=BATCH_SIZE)
pred_age = predicted.reshape(np.size(val_age))

In [None]:
plt.scatter(val_age, pred_age, c='green', s=1)
plt.plot(val_age, val_age)
plt.show()

## confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(val_age, np.round(pred_age), labels=range(101))
df = pd.DataFrame(cm)
df.to_csv("cm.csv")

### heatmap of confusion matrix

In [None]:
plt.figure(figsize=(20, 20))
s = sns.heatmap(
    cm, 
    square=True,
    cbar_kws={'shrink': 0.8},
    xticklabels=range(101),
    yticklabels=range(101),
)
s.set_xlabel('true age', fontsize=20)
s.set_ylabel('predicted age', fontsize=20)

## checking random results from validation set

In [None]:
plt.figure(figsize=(20, 20))

n = 16
indices = np.random.randint(np.size(val_age), size=n)

for i in range(n):
    plt.subplot(4, 4, i+1)
    index = indices[i]
    img = val_img[index]
    true = val_age[index]
    pred = int(np.round(pred_age[index]))
    plt.imshow(img, cmap='gray')
    plt.title(f"True age: {true}, Predicted: {pred}")
    plt.axis('off')