In [1]:
import numpy as np
import os
import warnings
from datetime import datetime
import mlflow
from dotenv import load_dotenv
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from tensorflow import keras 

warnings.filterwarnings('ignore')

load_dotenv('../.env')

RSEED = 123
MODELS_DIR=os.path.join('..','models')
MODEL_CHECKPOINTS_DIR=os.path.join('..','model_checkpoints')

start_time = datetime.now().strftime('-%Y-%m-%d-%T')

2022-12-02 09:29:13.315799: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-02 09:29:13.507610: I tensorflow/core/tpu/tpu_initializer_helper.cc:262] Libtpu path is: libtpu.so
I1202 09:29:13.609705268  215361 ev_epoll1_linux.cc:121]     grpc epoll fd: 69
D1202 09:29:13.609728099  215361 ev_posix.cc:141]            Using polling engine: epoll1
D1202 09:29:13.609825542  215361 lb_policy_registry.cc:43]   registering LB policy factory for "grpclb"
D1202 09:29:13.609836896  215361 lb_policy_registry.cc:43]   registering LB policy factory for "rls_experimental"
D1202 09:29:13.609847234  215361 lb_policy_registry.cc:43]   registering LB policy factory for "priority_experimental"
D1202 09:29:13

In [2]:
for dir in MODELS_DIR, MODEL_CHECKPOINTS_DIR:
    if not os.path.exists(dir):
        os.mkdir(dir)

In [3]:
# Set parameters
batch_size=16
patience=5
min_delta=0.001
dropout_rate=0.25
initial_learning_rate=0.0001


run_name_params = (
    f'bs{batch_size}'
    f'_pat{patience}'
    f'_del{min_delta}'
    f'_dr{dropout_rate}'
    f'_lr{initial_learning_rate}'
)

parent_run_name = f'mobilenetv2_{run_name_params}_save'

In [4]:
# Set up gcloud TPUs
cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')
tf.config.experimental_connect_to_cluster(cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
strategy = tf.distribute.TPUStrategy(cluster_resolver)


INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.
INFO:tensorflow:Initializing the TPU system: local


2022-12-02 09:29:16.338207: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-02 09:29:30.202506: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x87b31e0 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
2022-12-02 09:29:30.202544: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): TPU, 2a886c8
2022-12-02 09:29:30.202552: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (1): TPU, 2a886c8
2022-12-02 09:29:30.202558: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (2): TPU, 2a886c8
2022-12-02 09:29:30.202564: I tensorflow/compiler/xla/service/service.cc:181]   

INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:5, TPU, 0, 0)
I

In [5]:
# Set information for mlflow
run_description = """
Fully trained models 
    - classify each slice by tumour/tissue regions in the segmentation
    - Uses MobileNetV2
    - Saves model at end of run
"""
dataset = 'slice_classification_common_stratify_healthysegmented'
mlflow_tracking_uri = os.getenv('MLFLOW_URI')
if mlflow_tracking_uri:
    mlflow.set_tracking_uri(mlflow_tracking_uri)
mlflow_expt = os.getenv('CLASSIFICATION_EXPT')
if mlflow_expt:
    mlflow.set_experiment(mlflow_expt)    


print(f'Logging to \n URI:{mlflow_tracking_uri}\n Expt:{mlflow_expt}')


Logging to 
 URI:https://hudsju377cddpoevnjdkfnvpwovniewnipcdsnkvn.mlflow.neuefische.de
 Expt:braintumour_mri_slice_classification


In [6]:


with mlflow.start_run(
    run_name=parent_run_name,
    tags={
        'dataset': dataset,
    },
    description=run_description,
):

    img_height = 240
    img_width = 240
    data_dir = os.path.join('..','data','UPENN-GBM','slice_classification_common_stratify_healthysegmented','train')

    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split=0.2,
        subset="training",
        color_mode="rgba",
        seed=RSEED,
        image_size=(img_height, img_width),
        batch_size=batch_size)

    val_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        validation_split=0.2,
        subset="validation",
        color_mode="rgba",
        seed=RSEED,
        image_size=(img_height, img_width),
        batch_size=batch_size)

    
    class_names = train_ds.class_names

    # Calculate class weights for weighting accuracy
    ds_classes = []
    for _, batch_classes in train_ds:
        ds_classes.append(batch_classes.numpy())

    ds_classes = np.concatenate(ds_classes)

    class_weight = compute_class_weight(
        class_weight = 'balanced',
        classes = np.unique(ds_classes),
        y=ds_classes
    )

    class_weight = dict(zip(np.unique(ds_classes), class_weight))

    AUTOTUNE = tf.data.AUTOTUNE

    train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

    num_classes = len(class_names)
    
    margin = 8
    scaled_height = img_height - 2*margin
    scaled_width = img_width - 2*margin

    # Build layers for model with fixed base
    with strategy.scope():
        crop_layer = tf.keras.layers.Cropping2D(margin)
        rescale_initial = tf.keras.layers.Rescaling(1./127.5, offset=-1)
        conv_4to3_channel = tf.keras.layers.Conv2D(3,1,padding='same',activation='tanh')
        base_model = tf.keras.applications.MobileNetV2(
            input_shape=(scaled_width,scaled_height,3),
            include_top=False,
            weights='imagenet'
        )
        global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
        prediction_layer = tf.keras.layers.Dense(num_classes)

        base_model.trainable = False
        
        inputs = tf.keras.Input(shape=(img_width, img_height, 4))
        x = crop_layer(inputs)
        x = rescale_initial(x)
        x = conv_4to3_channel(x)
        x = base_model(x, training=False)
        x = global_average_layer(x)
        x = tf.keras.layers.Dropout(dropout_rate)(x)
        outputs = prediction_layer(x)
    
        earlystopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=patience,
            min_delta=min_delta,
            )
        
        model = tf.keras.Model(inputs, outputs)
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate,),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy'],
        )

        
    # Initial fit of classification and 4 to 3 channel layers
    with mlflow.start_run(
        run_name=f'fixed_{run_name_params}',
        tags={'dataset': dataset},
        nested=True
    ):
        mlflow.tensorflow.autolog()
        mlflow.log_param('ds_batch_size', batch_size)
        mlflow.log_param('ds_validation_batch_size', batch_size)


        fixed_base_epochs=80
        history_fixed_base = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=fixed_base_epochs,
            class_weight=class_weight,
            callbacks=[earlystopping],
        )

    # Relax top layers of base model
    base_model.trainable = True
    fix_below_layer = 100
    for layer in base_model.layers[:fix_below_layer]:
        layer.trainable = False
    with strategy.scope():
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate/10.0),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'],
        )

    with mlflow.start_run(
        run_name=f'partial_{run_name_params}',
        tags={'dataset': dataset},
        nested=True
    ):
        mlflow.tensorflow.autolog()
        mlflow.log_param('ds_batch_size', batch_size)
        mlflow.log_param('ds_validation_batch_size', batch_size)

        partial_relax_epochs=history_fixed_base.epoch[-1] + 100 
        history_partial_relax = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=partial_relax_epochs,
            initial_epoch=history_fixed_base.epoch[-1]+1,
            class_weight=class_weight,
            callbacks=[earlystopping],
        )

    # Fully relax model
    model.trainable = True

    with strategy.scope():
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate/10.0),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy'],
        )

    with mlflow.start_run(
        run_name=f'relax_{run_name_params}',
        tags={'dataset': dataset},
        nested=True
    ):
        mlflow.tensorflow.autolog()
        mlflow.log_param('ds_batch_size', batch_size)
        mlflow.log_param('ds_validation_batch_size', batch_size)

        # create checkpoint
        checkpoint_path = os.path.join(
            MODEL_CHECKPOINTS_DIR,
            parent_run_name + start_time + "-{epoch:03d}-{val_loss:.4f}.ckpt"
        )
        ckpt_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_path, 
            verbose=1, 
            save_weights_only=False,
            save_freq='epoch',
            monitor='val_loss',
            mode='min',
            save_best_only=True,
        ) 

        full_relax_epochs=history_partial_relax.epoch[-1] + 100
        history_full_relax = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=full_relax_epochs,
            initial_epoch=history_partial_relax.epoch[-1]+1,
            class_weight=class_weight,
            callbacks=[earlystopping, ckpt_callback],
        )


Found 79212 files belonging to 6 classes.
Using 63370 files for training.
Found 79212 files belonging to 6 classes.
Using 15842 files for validation.


2022-12-02 09:29:53.272092: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2022-12-02 09:29:53.340482: I tensorflow/compiler/jit/xla_compilation_cache.cc:476] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2022-12-02 09:30:13.989819: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 63370
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shape

Epoch 1/80


2022-12-02 09:30:22.175143: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:237] Subgraph fingerprint:11463474533832690926
2022-12-02 09:30:22.467672: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/Adam/AssignAddVariableOp.
2022-12-02 09:30:22.772155: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/Adam/AssignAddVariableOp.
2022-12-02 09:30:25.479511: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:437] TPU host compilation cache miss: cache_key(12182687663336830996), session_name()
2022-12-02 09:30:31.561300: I tensorflow/core/tpu/kernels/tpu_compile_op_common.cc:210] Compilation of 12182687663336830996 with session name  took 6.081654767s and succeeded
2022-12-02 09:30:31.598083: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:470] TPU host







2022-12-02 09:32:12.099721: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 15842
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:7"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRING
        }
      }
    }
  }
}

2022-12-02

Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80


2022-12-02 10:15:56.937831: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:237] Subgraph fingerprint:13210891179888482995
2022-12-02 10:15:57.068978: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2022-12-02 10:15:57.251749: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2022-12-02 10:15:57.546818: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:437] TPU host compilation cache miss: cache_key(8550665179216433261), session_name()




2022-12-02 10:16:00.890102: I tensorflow/core/tpu/kernels/tpu_compile_op_common.cc:210] Compilation of 8550665179216433261 with session name  took 3.343199053s and succeeded
2022-12-02 10:16:00.903880: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:470] TPU host compilation cache: compilation complete for cache_key(8550665179216433261), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13210891179888482995", property.function_library_fingerprint = 715824619521001631, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, topology.MissingChipCount() = 0, std::string(property.shapes_prefix) = "2,240,240,4,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
2022-12-02 10:16:00.903928: I tensorflow/core

INFO:tensorflow:Assets written to: /tmp/tmpcch78mlo/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmpcch78mlo/model/data/model/assets
2022-12-02 10:16:46.815421: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 63370
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        ar

Epoch 25/123


2022-12-02 10:16:57.773169: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:237] Subgraph fingerprint:9245565475796407030
2022-12-02 10:16:58.286601: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/Adam/AssignAddVariableOp.
2022-12-02 10:16:58.715780: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/Adam/AssignAddVariableOp.
2022-12-02 10:16:59.432313: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:437] TPU host compilation cache miss: cache_key(18126227813739970558), session_name()
2022-12-02 10:17:08.610070: I tensorflow/core/tpu/kernels/tpu_compile_op_common.cc:210] Compilation of 18126227813739970558 with session name  took 9.177672259s and succeeded
2022-12-02 10:17:08.649734: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:470] TPU host 







2022-12-02 10:18:59.632680: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 15842
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:7"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRING
        }
      }
    }
  }
}

2022-12-02

Epoch 26/123
Epoch 27/123
Epoch 28/123
Epoch 29/123
Epoch 30/123
Epoch 31/123
Epoch 32/123
Epoch 33/123
Epoch 34/123
Epoch 35/123
Epoch 36/123
Epoch 37/123
Epoch 38/123
Epoch 39/123
Epoch 40/123
Epoch 41/123
Epoch 42/123
Epoch 43/123
Epoch 44/123
Epoch 45/123
Epoch 46/123
Epoch 47/123
Epoch 48/123
Epoch 49/123
Epoch 50/123
Epoch 51/123
Epoch 52/123
Epoch 53/123


2022-12-02 11:18:41.976362: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:237] Subgraph fingerprint:13210891179888482995
2022-12-02 11:18:42.103739: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2022-12-02 11:18:42.286476: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.






INFO:tensorflow:Assets written to: /tmp/tmp_n2273vt/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmp_n2273vt/model/data/model/assets
2022-12-02 11:19:23.567855: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 63370
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        ar

Epoch 54/152


2022-12-02 11:19:42.699131: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:237] Subgraph fingerprint:10506435885300261483
2022-12-02 11:19:43.632768: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/Adam/AssignAddVariableOp.
2022-12-02 11:19:44.214182: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/Adam/AssignAddVariableOp.
2022-12-02 11:19:45.360044: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:437] TPU host compilation cache miss: cache_key(1042959909242842276), session_name()
2022-12-02 11:20:00.041748: I tensorflow/core/tpu/kernels/tpu_compile_op_common.cc:210] Compilation of 1042959909242842276 with session name  took 14.681586981s and succeeded
2022-12-02 11:20:00.085391: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:470] TPU host 







2022-12-02 11:22:22.957223: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 15842
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:7"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRING
        }
      }
    }
  }
}

2022-12-02


Epoch 54: val_loss improved from inf to 0.31785, saving model to ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-054-0.3179.ckpt




INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-054-0.3179.ckpt/assets


INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-054-0.3179.ckpt/assets


Epoch 55/152
Epoch 55: val_loss did not improve from 0.31785
Epoch 56/152
Epoch 56: val_loss did not improve from 0.31785
Epoch 57/152
Epoch 57: val_loss improved from 0.31785 to 0.31542, saving model to ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-057-0.3154.ckpt




INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-057-0.3154.ckpt/assets


INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-057-0.3154.ckpt/assets


Epoch 58/152
Epoch 58: val_loss improved from 0.31542 to 0.30773, saving model to ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-058-0.3077.ckpt




INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-058-0.3077.ckpt/assets


INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-058-0.3077.ckpt/assets


Epoch 59/152
Epoch 59: val_loss did not improve from 0.30773
Epoch 60/152
Epoch 60: val_loss did not improve from 0.30773
Epoch 61/152
Epoch 61: val_loss did not improve from 0.30773
Epoch 62/152
Epoch 62: val_loss improved from 0.30773 to 0.27486, saving model to ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-062-0.2749.ckpt




INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-062-0.2749.ckpt/assets


INFO:tensorflow:Assets written to: ../model_checkpoints/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15-062-0.2749.ckpt/assets


Epoch 63/152
Epoch 63: val_loss did not improve from 0.27486
Epoch 64/152
Epoch 64: val_loss did not improve from 0.27486
Epoch 65/152
Epoch 65: val_loss did not improve from 0.27486
Epoch 66/152
Epoch 66: val_loss did not improve from 0.27486
Epoch 67/152
Epoch 67: val_loss did not improve from 0.27486


2022-12-02 11:59:20.172417: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:237] Subgraph fingerprint:13210891179888482995
2022-12-02 11:59:20.298765: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
2022-12-02 11:59:20.480190: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.






INFO:tensorflow:Assets written to: /tmp/tmp2cyqo5bn/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmp2cyqo5bn/model/data/model/assets


In [7]:
test_data_dir = os.path.join('..','data','UPENN-GBM','slice_classification_common_stratify_healthysegmented','test')

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_data_dir,
    color_mode="rgba",
    seed=RSEED,
    shuffle=False,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)

Found 9605 files belonging to 6 classes.


In [8]:
val_pred = model.predict(val_ds)
val_prob = tf.nn.softmax(val_pred)
val_class_pred = [np.argmax(x) for x in val_prob]
val_base = [ 0 for x in val_class_pred ]

val_true_class = []
for _, classes in val_ds:
    val_true_class += list(classes)




2022-12-02 12:00:06.217306: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 15842
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:7"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRING
        }
      }
    }
  }
}

2022-12-02

  6/991 [..............................] - ETA: 24s   

2022-12-02 12:00:13.135835: I tensorflow/core/tpu/kernels/tpu_compile_op_common.cc:210] Compilation of 4858963082524620730 with session name  took 3.755608825s and succeeded
2022-12-02 12:00:13.148943: I tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc:470] TPU host compilation cache: compilation complete for cache_key(4858963082524620730), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_10454398515663473433", property.function_library_fingerprint = 6712631106669902044, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, topology.MissingChipCount() = 0, std::string(property.shapes_prefix) = "2,240,240,4,;2,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
2022-12-02 12:00:13.149015: I tensorflow/



In [9]:
test_pred = model.predict(test_ds)
test_prob = tf.nn.softmax(test_pred)
test_class_pred = [np.argmax(x) for x in test_prob]
test_base = [ 0 for x in test_class_pred ]

test_true_class = []
for _, classes in test_ds:
    test_true_class += list(classes)


2022-12-02 12:00:40.167474: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_1"
op: "TensorSliceDataset"
input: "Placeholder/_0"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 9605
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\027TensorSliceDataset:5711"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
    }
  }
}
attr {
  key: "replicate_on_split"
  value {
    b: false
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRING
        }
      }
    }
  }
}





In [14]:
class_names

['background',
 'background_edema_healthy',
 'background_edema_healthy_contrast',
 'background_healthy',
 'background_tumour_edema_healthy',
 'background_tumour_edema_healthy_contrast']

In [10]:
print(classification_report(val_true_class, val_class_pred))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       528
           1       0.69      0.85      0.76      2499
           2       0.32      0.58      0.42       293
           3       0.96      0.90      0.93      8465
           4       0.49      0.72      0.58        53
           5       0.98      0.90      0.94      4004

    accuracy                           0.89     15842
   macro avg       0.74      0.83      0.77     15842
weighted avg       0.91      0.89      0.90     15842



In [11]:
print(classification_report(test_true_class, test_class_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       965
           1       0.52      0.66      0.58      1361
           2       0.17      0.37      0.23       166
           3       0.91      0.86      0.88      4743
           4       0.00      0.00      0.00        69
           5       0.96      0.89      0.93      2301

    accuracy                           0.84      9605
   macro avg       0.60      0.63      0.60      9605
weighted avg       0.86      0.84      0.85      9605



In [12]:
model_file_name = os.path.join(MODELS_DIR, parent_run_name + start_time)
model.save(model_file_name)




INFO:tensorflow:Assets written to: ../models/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15/assets


INFO:tensorflow:Assets written to: ../models/mobilenetv2_bs16_pat5_del0.001_dr0.25_lr0.0001_save-2022-12-02-09:29:15/assets
