<a href="https://colab.research.google.com/github/IverMartinsen/ColabNotebooks/blob/main/combined_model_test_run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import modules

In [None]:
import sys

sys.path.append('/content/drive/Othercomputers/Min bærbare datamaskin/UiT/src/Python')

import pandas as pd
import tensorflow as tf
import numpy as np
from modules.stratified_idxs import stratified_idxs
from modules.losses import MeanSquaredErrorKLD
from scipy.optimize import minimize

Load features, i.e. age, sex and length

In [None]:
# Load dataframe of features
df = pd.read_csv('/content/drive/Othercomputers/Min bærbare datamaskin/UiT/Data/Grønlandskveiteotolitter/dataframe.csv')

# Locate data points with complete set of features
notna = np.all(np.array(df.notna()), axis = 1)

# Drop data with incomplete set of features
df = df.dropna()

Load images

In [None]:
# Set constants
image_size = (128, 128)

file_path = '/content/drive/Othercomputers/Min bærbare datamaskin/UiT/Data/Grønlandskveiteotolitter/greenland_halibut_std'

# Load images from directory in alphabetical order
dataset = tf.keras.utils.image_dataset_from_directory(
    file_path,
    labels=None,
    image_size=image_size,
    shuffle=False)

# Stack images into numpy array
# Only use images with complete corresponding features
images = np.stack(list(dataset.unbatch().as_numpy_iterator()))[notna]

Create subsets for training, validation and testing

In [None]:
# Create stratified indices for selecting datasets for training etc.
train_idx, valid_idx, test_idx = stratified_idxs(df['age'], (0.6, 0.2, 0.2), seed=123)

# Create utility function for creating datasets compatible with tensorflow
set_from_idx = lambda idx : (tf.convert_to_tensor(df['sex'].iloc[idx]), images[idx])

# Create stratified subsets for training, validation and testing
x_tr = set_from_idx(train_idx)
x_va = set_from_idx(valid_idx)
x_te = set_from_idx(test_idx)

y_tr = df['age'].iloc[train_idx]
y_va = df['age'].iloc[valid_idx]
y_te = df['age'].iloc[test_idx]

f_tr = df['filename'].iloc[train_idx]
f_va = df['filename'].iloc[valid_idx]
f_te = df['filename'].iloc[test_idx]

Define model

In [None]:
# Create layer for mapping categorical labels to int
Index = tf.keras.layers.StringLookup()
# Fit index layer on training data
Index.adapt(tf.constant(x_tr[0]))

# Create layer for one-hot-encoding the categorical labels
Encoding = tf.keras.layers.CategoryEncoding(num_tokens=Index.vocabulary_size(), output_mode='one_hot')

# Define pretrained base model without classification head. Use global average pooling on output.
base_model = tf.keras.applications.Xception(
    input_shape=image_size + (3, ), 
    include_top=False,
    pooling='avg')

# Define full model. Note that by setting training=False in the base model
# we always run the model in inference mode. 
img_input = tf.keras.layers.Input(image_size + (3, ))
cat_input = tf.keras.Input(shape=(1,), name='gender', dtype='string')

gender = Encoding(Index(cat_input))

# First we process the images
x = tf.keras.applications.xception.preprocess_input(img_input)
x = tf.keras.layers.RandomTranslation(0, 0.1)(x)
x = tf.keras.layers.RandomRotation(0.1, fill_mode='constant')(x)
x = base_model(x, training=False)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(3, 'relu')(x)
# Then we us multiplication to get the gender conditional age predictions 
outputs = tf.keras.layers.Dot(axes=1)([x, gender])
# Finally we concatenate the age prediction with the one-hot sex matrix
model = tf.keras.models.Model([cat_input, img_input], tf.keras.layers.Concatenate()([outputs, gender]))

Compile and fit model

In [None]:
# Compile model using custom loss function
cnn.compile(tf.keras.optimizers.Adam(1e-3), MeanSquaredErrorKLD())

# Apply early stopping
callbacks = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)

# Fit model
cnn.fit(
    x_tr,
    y_tr,
    batch_size=32,
    epochs=100,
    validation_data=(x_va, y_va),
    callbacks = callbacks
    )

Predict age by length

In [None]:
# Define loss function to be minimized as function of parameters
loss_by_length = lambda params: tf.reduce_mean(
    (y_tr - params[0] + tf.math.log(1 - df['length'].iloc[train_idx]/params[1])/params[2])**2).numpy()

# Find optimal set of parameters
params = minimize(loss_by_length, (1, 10000, 1)).x

# Define function that takes length as input and returns age
age_by_length = lambda params: (lambda length: params[0] - tf.math.log(1 - length/params[1])/params[2])

# Predict age by length of test set
y_hat2 = age_by_length(params)(df['length'].iloc[test_idx])