In [1]:
# Load the TensorBoard notebook extension.
%load_ext tensorboard

In [2]:
from datetime import datetime
from packaging import version

import tensorflow as tf
from tensorflow import keras

# My imports
import os
from time import time
import tensorflow_hub as hub
from tensorflow.keras import layers
from objective_func import macro_soft_f1, macro_f1
from loader.mimic_cxr_jpg_loader import MIMIC_CXR_JPG_Loader
from utils.augmentation import preprocess_image
from lars_optimizer import LARSOptimizer
from utils.analysis import *

2024-02-14 12:47:04.695921: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-14 12:47:05.366519: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.2/lib64:/home/payam/miniconda3/envs/tf2-gpu/lib/
2024-02-14 12:47:05.366606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.2/lib64:/home/payam/miniconda3/envs/tf2-gpu

In [3]:
# Global variables
project_folder = os.getcwd()
BASE_MODEL_PATH = './base-models/simclr/r152_2x_sk1/hub/'
BATCH_SIZE = 4
LEARNING_RATE = 0.1
EPOCHS = 1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-6
IMAGE_SIZE = (448, 448)
CHANNELS = 3
num_classes = 14

In [4]:
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

TensorFlow version:  2.11.0


In [5]:
import tensorboard
tensorboard.__version__

'2.11.2'

In [6]:
# Clear any logs from previous runs
!rm -rf ./out/board/

In [7]:
hub_path = os.path.join(project_folder, BASE_MODEL_PATH)
try:
    feature_extractor_layer = hub.KerasLayer(hub_path, input_shape=(*IMAGE_SIZE, CHANNELS), trainable=False)
except:
    print(f"""The model {hub_path} did not load. Please verify the model path. It is also worth considering that the model might still be in the process of being uploaded to the designated location. If you have recently uploaded it to a notebook, there could be delays associated with the upload.""")
    raise

#------------------- SETUP TRAINING HEAD -------------------#

model = tf.keras.Sequential([
    feature_extractor_layer,
    layers.Dense(1024, activation='relu', name='hidden_layer'),
    layers.Dense(num_classes, activation='sigmoid', name='multi-label_classifier')
])

# TEMP for debugging
print(model.summary())

2024-02-14 12:47:09.149305: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 12:47:09.153401: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 12:47:09.153543: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 12:47:09.153824: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorF

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 4096)              391961768 
                                                                 
 hidden_layer (Dense)        (None, 1024)              4195328   
                                                                 
 multi-label_classifier (Den  (None, 14)               14350     
 se)                                                             
                                                                 
Total params: 396,171,446
Trainable params: 4,209,678
Non-trainable params: 391,961,768
_________________________________________________________________
None


In [8]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY)
optimizer.exclude_from_weight_decay(var_names=['batch_normalization', 'bias', 'head_supervised'])

model.compile(
    optimizer=optimizer,
    loss=macro_soft_f1,
    metrics=[macro_f1])

In [9]:
# Define the Keras TensorBoard callback.
logdir="out/board/fit" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

In [10]:
def _preprocess_train(x, y, info=None):
    x = preprocess_image(
        x, *IMAGE_SIZE,
        is_training=True, color_distort=False, crop='Center')
    return x, y

def _preprocess_val(x, y, info=None):
    x = preprocess_image(
        x, *IMAGE_SIZE,
        is_training=False, color_distort=False, crop='Center')
    return x, y

In [11]:
customLoader = MIMIC_CXR_JPG_Loader({'train': 10, 'validate': 5, 'test': 0}, project_folder)
train_tfds, val_tfds, test_tfds = customLoader.load()

train_tfds = train_tfds.shuffle(buffer_size=2*BATCH_SIZE)
batched_train_tfds = train_tfds.map(_preprocess_train).batch(BATCH_SIZE)

val_tfds = val_tfds.shuffle(buffer_size=2*BATCH_SIZE)
batched_val_tfds = val_tfds.map(_preprocess_val).batch(BATCH_SIZE)

In [12]:
start = time()
history = model.fit(batched_train_tfds,
                epochs=EPOCHS,
                validation_data=batched_val_tfds,
                callbacks=[tensorboard_callback])
print('\nTraining took {}'.format(print_time(time()-start)))

2024-02-14 12:47:52.262624: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100
2024-02-14 12:47:53.621084: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7fe7563a5850 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-02-14 12:47:53.621112: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce GTX 1080 Ti, Compute Capability 6.1
2024-02-14 12:47:53.624602: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-02-14 12:47:53.702666: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



Training took 0h:0m:23s


In [17]:
%tensorboard --logdir=out/board --host=localhost --port=8008

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
2024-02-14 12:52:12.455967: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-14 12:52:13.015760: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.2/lib64:/home/payam/miniconda3/envs/tf2-gpu/lib/
2024-02-14 12:52:13.015847: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIB

In [21]:
import tensorflow_hub as hub
from tensorflow.python.framework import importer
from tensorflow.python.framework import ops
from tensorflow.python.summary import summary
from tensorflow.python.client import session

project_folder = os.getcwd()
log_dir = './out/board/vis_remedis' #where to save your tensorboard event file
model_handle = project_folder + '/base-models/remedis/cxr-50x1-remedis-m/'

model = hub.load(model_handle)  #Import your tf hub model here
print(model.signatures)

#the signature here is 'default'.  you can figure you what yours is by calling 'model.signatures' and checking the output (e.g. _SignatureMap({'default': <tensorflow.python.eager.wrap_function.WrappedFunction object...)

model_graphdef = model.signatures['serving_default'].graph.as_graph_def() 

with session.Session(graph=ops.Graph()) as sess:
  input_graph_def = model_graphdef
  
  importer.import_graph_def(input_graph_def)

  pb_visual_writer = summary.FileWriter(log_dir)
  pb_visual_writer.add_graph(sess.graph)
  print("Model Imported. Visualize by running: "
        "tensorboard --logdir={}".format(log_dir))

_SignatureMap({'serving_default': <ConcreteFunction signature_wrapper(*, input_1) at 0x7FE89F618CD0>})
Model Imported. Visualize by running: tensorboard --logdir=./out/board/vis_remedis


2024-02-14 13:41:09.105137: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 13:41:09.105443: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 13:41:09.105657: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 13:41:09.105871: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-02-14 13:41:09.106050: I tensorflow/compiler/xla/stream_executo

In [22]:
!python finetuning2.py --dataset=MIMIC-CXR \
  --base_model_path=./base-models/remedis/cxr-50x1-remedis-m/ \
  --epochs=2 --batch_size=4 --learning_rate=0.1

2024-02-14 13:50:09.349947: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-14 13:50:09.914698: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.2/lib64:/home/payam/miniconda3/envs/tf2-gpu/lib/
2024-02-14 13:50:09.914803: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.2/lib64:/home/payam/miniconda3/envs/tf2-gpu