In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from keras.metrics import mean_absolute_error
import datetime, os
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import matplotlib.image as mpimg

import wandb
from wandb.keras import WandbCallback

#library required for image preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from utils import create_dataset_from_file
from models import SmallCNN, Inception
# !wandb login  # Login command for Weights and Biases library

# To disable the GPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""

2023-02-09 20:11:05.374977: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-09 20:11:05.693321: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-02-09 20:11:05.739718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /app/lib
2023-02-09 20:11:05.739743: I tensorflow/compiler/xla/stream_executor/cuda

## HyperParameters

In [2]:
img_size = 256

## Load and Preprocess Input Dataset

In [98]:
# machine = "remote_system"
machine = "local"
# loading Data
    
### Set here the localtion of the data folder of your google drive
if machine == "remote_system":
    train_dir = "/server0/0/2022/mtirmizi/Documents/bone_data/Bone Age Training Set/"
    validation_dir = "/server0/0/2022/mtirmizi/Documents/bone_data/Bone Age Validation Set/"
    test_dir = "/server0/0/2022/mtirmizi/Documents/bone_data/Bone Age Test Set/"
else:
    train_dir = "/home/teemo/Documents/bone_data/Bone Age Training Set/"
    validation_dir = "/home/teemo/Documents/bone_data/Bone Age Validation Set/"
    test_dir = "/home/teemo/Documents/bone_data/Bone Age Test Set/"


### Train Data
train_image_dir = os.path.join( train_dir, "boneage-training-dataset")
train_df = pd.read_csv( os.path.join(train_dir,"train.csv") )

### Validation Data
validation_image_dir = os.path.join( validation_dir, "boneage-validation-dataset")
valid_df = pd.read_csv( os.path.join(validation_dir,"Validation Dataset.csv") )

### Test Data
test_image_dir = os.path.join(test_dir, "boneage-testing-dataset")
test_df = pd.read_excel(  os.path.join(test_dir, "test.xlsx"))

# Preprocess Train Dataset
train_df["male"] = train_df["male"].astype(int)

# Preprocess Validation Dataset
valid_df = valid_df.rename(columns={'Bone Age (months)': 'boneage', 'Image ID': 'id'})
valid_df["male"] = valid_df["male"].astype(int)

# Preprocess Test Dataset
test_df = test_df.rename(columns={'Ground truth bone age (months)': 'boneage', 'Case ID': 'id'})
test_df["male"] = test_df['Sex'].replace(['M', 'F'], [1, 0])
test_df = test_df.drop(columns=["Sex"])


# Appending file extension to id column for both training and testing dataframes
train_df['id'] = train_df['id'].apply(lambda x: str(x) + '.png')
valid_df['id'] = valid_df['id'].apply(lambda x: str(x) + '.png')
test_df['id'] = test_df['id'].apply(lambda x: str(x) + '.png') 

# Create Image paths. Will be needed in tensorflow Dataset API
train_df['img_path'] = train_df['id'].apply(lambda x: os.path.join(train_image_dir, str(x)) )
valid_df['img_path'] = valid_df['id'].apply(lambda x: os.path.join(validation_image_dir, str(x)) )
test_df['img_path'] = test_df['id'].apply(lambda x: os.path.join(test_image_dir, str(x)) )

#mean age is
mean_bone_age = train_df['boneage'].mean()

#standard deviation of boneage
std_bone_age = train_df['boneage'].std()

#models perform better when features are normalised to have zero mean and unity standard deviation
#using z score for the training
train_df.loc[:, 'bone_age_z'] = (train_df['boneage'] - mean_bone_age) / std_bone_age

# Similarly z score for Validation & testing data
valid_df.loc[:, "bone_age_z"] = (valid_df["boneage"] - mean_bone_age) / std_bone_age
test_df.loc[:, 'bone_age_z'] = (test_df['boneage'] - mean_bone_age) / std_bone_age

# train_df = train_df[:64]
# valid_df = valid_df[:64]

# train_dataset = create_dataset_from_file(train_df["img_path"], train_df["male"].to_numpy().reshape(-1, 1), train_df["bone_age_z"], use_gender=False, batch_size=32)
# valid_dataset = create_dataset_from_file(valid_df["img_path"], valid_df["male"].to_numpy().reshape(-1, 1), valid_df["bone_age_z"], use_gender=False, batch_size=32)
# test_dataset = create_dataset_from_file(test_df["img_path"], test_df["male"].to_numpy().reshape(-1, 1), test_df["bone_age_z"], use_gender=False, batch_size=32)

# train_dataset_wg = create_dataset_from_file(train_df["img_path"], train_df["male"].to_numpy().reshape(-1, 1), train_df["bone_age_z"], use_gender=True, batch_size=32)
# valid_dataset_wg = create_dataset_from_file(valid_df["img_path"], valid_df["male"].to_numpy().reshape(-1, 1), valid_df["bone_age_z"], use_gender=True, batch_size=32)
# test_dataset_wg = create_dataset_from_file(test_df["img_path"], test_df["male"].to_numpy().reshape(-1, 1), test_df["bone_age_z"], use_gender=True, batch_size=32)

## Model Training

In [99]:
# Helper functions

def mae_in_months(x_p, y_p):
    '''function to return mae in months'''
    return mean_absolute_error((std_bone_age*x_p + mean_bone_age), (std_bone_age*y_p + mean_bone_age)) 
def random_learning_rate(lower_bound=0.01, upper_bound=0.1) -> float:
    return np.random.uniform(lower_bound, upper_bound) * np.random.choice([1, 0.1, 0.01 ])

def tf_dataset_calculate_mae_in_months(tf_dataset=None):
    pred_y = np.array([])
    test_y = np.array([])
    for xray_batch in tf_dataset:
        # In xray_batch is a tuple, 1st element is features, 2nd element is the label or target
        yhat = model.predict(xray_batch[0]).flatten()
        pred_y = np.append(pred_y, yhat)
        y = xray_batch[1].numpy()
        test_y = np.append(test_y, y)
    mae = mae_in_months(pred_y, test_y)
    #     break
    # print(f"pred_y: {len(pred_y)}")
    # print(f"test_y: {len(test_y)}")
    return mae.numpy()

## For debugging
# Calculate Test Mean Absolute Error (MAE) in months
# pred_y = model.predict(test_dataset_wg)
# test_y = test_df["bone_age_z"]
# test_y = mean_bone_age + std_bone_age * ( df_test[hparams.TARGET_VAR].to_numpy())
# pred_y = mean_bone_age + std_bone_age * (model.predict(test_dataset))
# mae_value = sklearn.metrics.mean_absolute_error(test_y, pred_y)
# test_df["bone_age_z"].shape
# mae_in_month = smae_in_months(test_df["bone_age_z"], pred_y.flatten()).numpy()
# print(mae_in_months)

### Prepare callback functions

In [100]:
# reduce lr on plateau
red_lr_plat = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.1,
    patience=10,
    verbose=0,
    mode="min",
    min_delta=0.0001,
    cooldown=0,
    min_lr=1e-10,
)

# The Tensorflow checkpoint callback function is used to maintain the best model weights
# model checkpoint
mc = ModelCheckpoint(
    "data/artifact/" + "model" + ".h5",
    monitor="val_loss",
    mode="min",
    save_best_only=True,
)

## Base Convolution Neural Network (BaseCNN)

In [102]:

with_gender = True
for i in range(2):
    if i == 1:
        with_gender = False
    lr = random_learning_rate()
    batch_size = np.random.choice([16])
    epoch = np.random.choice([50])

    # Set Batch Size in the datasets
    if not with_gender:
        train_dataset = create_dataset_from_file(train_df["img_path"], train_df["male"].to_numpy().reshape(-1, 1), train_df["bone_age_z"], use_gender=False, batch_size=batch_size)
        valid_dataset = create_dataset_from_file(valid_df["img_path"], valid_df["male"].to_numpy().reshape(-1, 1), valid_df["bone_age_z"], use_gender=False, batch_size=batch_size)
        test_dataset = create_dataset_from_file(test_df["img_path"], test_df["male"].to_numpy().reshape(-1, 1), test_df["bone_age_z"], use_gender=False, batch_size=batch_size)
    else:
        train_dataset_wg = create_dataset_from_file(train_df["img_path"], train_df["male"].to_numpy().reshape(-1, 1), train_df["bone_age_z"], use_gender=True, batch_size=batch_size)
        valid_dataset_wg = create_dataset_from_file(valid_df["img_path"], valid_df["male"].to_numpy().reshape(-1, 1), valid_df["bone_age_z"], use_gender=True, batch_size=batch_size)
        test_dataset_wg = create_dataset_from_file(test_df["img_path"], test_df["male"].to_numpy().reshape(-1, 1), test_df["bone_age_z"], use_gender=True, batch_size=batch_size)

    # Weights and Biases run initialization
    run = wandb.init(project="hda-final", 
                    entity="hda-project",  # Entity is my team name on wandb website
                    name = f"CNN-v1-g-{with_gender}",
                    config = {
                    "MODEL_NAME": "CNN",
                    "START_LR": lr,
                    "BATCH_SIZE": batch_size,
                    "GENDER": with_gender
                    })

    callbacks = [red_lr_plat, WandbCallback()]

    optimizer = tf.keras.optimizers.Adam( lr )

    input_img = tf.keras.Input(shape=(img_size, img_size, 3), name="image")
    if not with_gender:
        model = SmallCNN(input_img=input_img)()

        #compile model
        model.compile(loss = 'mse', optimizer = optimizer, metrics = [mae_in_months])

        # Train the model
        model.fit(train_dataset,  epochs = epoch, callbacks=callbacks, validation_data=valid_dataset)
        
        # predictions on test dataset
        test_mae = tf_dataset_calculate_mae_in_months(test_dataset)

    else:
        input_gender = tf.keras.Input(shape=(1), name="gender")
        model = SmallCNN( input_img=input_img, input_gender=input_gender )()

        #compile model
        model.compile(loss = 'mse', optimizer = optimizer, metrics = [mae_in_months])

        # Train the model
        model.fit(train_dataset_wg,  epochs = epoch, callbacks=callbacks, validation_data=valid_dataset_wg)

        # predictions on test dataset
        test_mae = tf_dataset_calculate_mae_in_months(test_dataset_wg)

    art = wandb.Artifact(f"model-{run.name}-h5", type="model")
    art.add_file(f"{run.dir}/model-best.h5")
    wandb.log_artifact(art)

    # Log Performance of the test dataset
    wandb.log({"test_mae_in_months": test_mae})

    # Tell W&B that a model run is complete
    run.finish() 

# Save the Model
# model.save(os.path.join("..", "nn_models", "model.h5"))

Epoch 1/50
 25/789 [..............................] - ETA: 2:59 - loss: 2.2763 - mae_in_months: 50.1212

KeyboardInterrupt: 

## Inceptionv4 Neural Network (Inv4NN)

In [9]:
with_gender = True
for i in range(2):
    lr = random_learning_rate()
    batch_size = np.random.choice([8, 16, 32])
    epoch = np.random.choice([3, 4, 5 ])

    if not with_gender:
        # Set Batch Size in the datasets
        train_dataset = create_dataset_from_file(train_df["img_path"], train_df["male"].to_numpy().reshape(-1, 1), train_df["bone_age_z"], use_gender=False, batch_size=batch_size)
        valid_dataset = create_dataset_from_file(valid_df["img_path"], valid_df["male"].to_numpy().reshape(-1, 1), valid_df["bone_age_z"], use_gender=False, batch_size=batch_size)
        test_dataset = create_dataset_from_file(test_df["img_path"], test_df["male"].to_numpy().reshape(-1, 1), test_df["bone_age_z"], use_gender=False, batch_size=batch_size)
    else:
        train_dataset_wg = create_dataset_from_file(train_df["img_path"], train_df["male"].to_numpy().reshape(-1, 1), train_df["bone_age_z"], use_gender=True, batch_size=batch_size)
        valid_dataset_wg = create_dataset_from_file(valid_df["img_path"], valid_df["male"].to_numpy().reshape(-1, 1), valid_df["bone_age_z"], use_gender=True, batch_size=batch_size)
        test_dataset_wg = create_dataset_from_file(test_df["img_path"], test_df["male"].to_numpy().reshape(-1, 1), test_df["bone_age_z"], use_gender=True, batch_size=batch_size)

    # Weights and Biases run initialization
    run = wandb.init(project="jan12-run", 
                    entity="hda-project",  # Entity is my team name on wandb website
                    name = "Incenptionv4-v2-wg",
                    config = {
                    "MODEL_NAME": "Inceptionv4-Gender",
                    "START_LR": lr,
                    "BATCH_SIZE": batch_size,
                    "GENDER": with_gender
                    })
    # wandb.config["learning_rate"] = lr
    # wandb.config["epochs"] = epoch
    callbacks = [red_lr_plat, WandbCallback()]

    input_img = tf.keras.Input(shape=(img_size, img_size, 3), name="image")
    input_gender = tf.keras.Input(shape=(1), name="gender")

    optimizer = tf.keras.optimizers.Adam( lr )

    if not with_gender:
        model = Inception((img_size, img_size, 3))()

        #compile model
        model.compile(loss = 'mse', optimizer = optimizer , metrics = [mae_in_months])

        # Train the model
        model.fit(train_dataset,  epochs = epoch, callbacks=callbacks, validation_data=valid_dataset)

    else:
        model = Inception((img_size, img_size, 3), input_gender=input_gender)()

        #compile model
        model.compile(loss = 'mse', optimizer = optimizer , metrics = [mae_in_months])

        # Train the model
        model.fit(train_dataset_wg,  epochs = epoch, callbacks=callbacks, validation_data=valid_dataset_wg)


    # Tell W&B that a model run is complete
    run.finish() 

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016668483933335664, max=1.0…

Epoch 1/3



INFO:tensorflow:Assets written to: /home/teemo/MEGA/bdma-semesters/3-semester/Human Data Analysis/project_Radiograph-Age-Prediction/src/wandb/run-20230125_080648-bhft6zdy/files/model-best/assets


INFO:tensorflow:Assets written to: /home/teemo/MEGA/bdma-semesters/3-semester/Human Data Analysis/project_Radiograph-Age-Prediction/src/wandb/run-20230125_080648-bhft6zdy/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/home/teemo/MEGA/bdma-semesters/3-semester/Human Data Analysis/project_Radiograph-Age-Prediction/src/wandb/run-20230125_080648-bhft6zdy/files/model-best)... Done. 1.6s


Epoch 2/3
Epoch 3/3


0,1
epoch,▁▅█
loss,█▁▁
lr,▁▁▁
mae_in_months,█▁▁
val_loss,▁▅█
val_mae_in_months,▁▆█

0,1
best_epoch,0.0
best_val_loss,1.03388
epoch,2.0
loss,1.92449
lr,0.09442
mae_in_months,48.34068
val_loss,1.05161
val_mae_in_months,35.00496


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666859628333744, max=1.0)…

Epoch 1/3
Epoch 2/3
Epoch 3/3


0,1
epoch,▁▅█
loss,█▁▁
lr,▁▁▁
mae_in_months,█▁▁
val_mae_in_months,▁

0,1
epoch,2.0
loss,2.20724
lr,0.02606
mae_in_months,48.08841
val_loss,inf
val_mae_in_months,4.191335238502998e+29


In [35]:
batch_size

8

In [78]:
a = np.array([1, 2, 3])
np.append(a, [4, 5, 6])

array([1, 2, 3, 4, 5, 6])

In [94]:
def tf_dataset_calculate_mae_in_months(tf_dataset=None):
    pred_y = np.array([])
    test_y = np.array([])
    for xray_batch in tf_dataset:
        # In xray_batch is a tuple, 1st element is features, 2nd element is the label or target
        yhat = model.predict(xray_batch[0]).flatten()
        pred_y = np.append(pred_y, yhat)
        y = xray_batch[1].numpy()
        test_y = np.append(test_y, y)
    mae = mae_in_months(pred_y, test_y)
    #     break
    # print(f"pred_y: {len(pred_y)}")
    # print(f"test_y: {len(test_y)}")
    return mae.numpy()

## For debugging
# Calculate Test Mean Absolute Error (MAE) in months
# pred_y = model.predict(test_dataset_wg)
# test_y = test_df["bone_age_z"]
# test_y = mean_bone_age + std_bone_age * ( df_test[hparams.TARGET_VAR].to_numpy())
# pred_y = mean_bone_age + std_bone_age * (model.predict(test_dataset))
# mae_value = sklearn.metrics.mean_absolute_error(test_y, pred_y)
# test_df["bone_age_z"].shape
# mae_in_month = smae_in_months(test_df["bone_age_z"], pred_y.flatten()).numpy()
# print(mae_in_months)

tf_dataset_calculate_mae_in_months(test_dataset_wg)




38.74355136832752

## Old Code

In [None]:
input_img = tf.keras.Input(shape=(img_size, img_size, 3), name="Image")

input_gender = tf.keras.Input(shape=(1), name="Gender")

model = SmallCNN( input_img=input_img, input_gender=input_gender )()

optimizer = tf.keras.optimizers.Adam( 0.1 )

#compile model
model.compile(loss = 'mse', optimizer = optimizer , metrics = [mae_in_months])

# Train the model
model.fit(train_dataset_wg,  epochs = 10, callbacks=callbacks, validation_data=valid_dataset_wg)

# Tell W&B that a model run is complete
run.join() 

# Save the Model
# model.save(os.path.join("..", "nn_models", "model.h5"))