In [1]:
import os, pickle
import tensorflow as tf
import tensorflow.keras.backend as K
from datetime import datetime
import numpy as np
from sklearn.model_selection import KFold
import src.training as tr_fn
from loguru import logger
import wandb
from config import CFG, GCFG

AUTO = tf.data.experimental.AUTOTUNE
CFG2 = GCFG()
class_dict = pickle.load(open('src/class_dict.pkl', 'rb'))

2023-10-28 09:29:51.579091: I tensorflow/compiler/xla/stream_executor/tpu/tpu_initializer_helper.cc:242] Libtpu path is: libtpu.so
D1028 09:29:54.620323350   13219 config.cc:175]                        gRPC EXPERIMENT tcp_frame_size_tuning               OFF (default:OFF)
D1028 09:29:54.620346394   13219 config.cc:175]                        gRPC EXPERIMENT tcp_rcv_lowat                       OFF (default:OFF)
D1028 09:29:54.620351689   13219 config.cc:175]                        gRPC EXPERIMENT peer_state_based_framing            OFF (default:OFF)
D1028 09:29:54.620356056   13219 config.cc:175]                        gRPC EXPERIMENT memory_pressure_controller          OFF (default:OFF)
D1028 09:29:54.620360509   13219 config.cc:175]                        gRPC EXPERIMENT unconstrained_max_quota_buffer_size OFF (default:OFF)
D1028 09:29:54.620364897   13219 config.cc:175]                        gRPC EXPERIMENT event_engine_client                 OFF (default:OFF)
D1028 09:29:54.62036924

In [2]:
def decode_image(image_data, CFG):
    image = tf.image.decode_jpeg(image_data, channels=3)  # image format uint8 [0,255]
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [*CFG.IMAGE_SIZE, 3]) # explicit size needed for TPU
    return image

def read_labeled_tfrecord(example):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'dataset': tf.io.FixedLenFeature([], tf.int64),
        'longitude': tf.io.FixedLenFeature([], tf.float32),
        'latitude': tf.io.FixedLenFeature([], tf.float32),
        'norm_date': tf.io.FixedLenFeature([], tf.float32),
        'class_priors': tf.io.FixedLenFeature([], tf.float32),
        'class_id': tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(example, feature_description)
    # image = decode_image(example['image'], CFG)
    label = tf.cast(example['class_id'], tf.int32)
    return example['image'], label

def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transform matrix which transforms indices

    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.0
    shear = math.pi * shear / 180.0

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst], axis=0), [3, 3])

    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1], dtype='float32')
    zero = tf.constant([0], dtype='float32')

    rotation_matrix = get_3x3_mat([c1, s1, zero, -s1, c1, zero, zero, zero, one])
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)

    shear_matrix = get_3x3_mat([one, s2, zero, zero, c2, zero, zero, zero, one])
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one / height_zoom, zero, zero, zero, one / width_zoom, zero, zero, zero, one])
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one, zero, height_shift, zero, one, width_shift, zero, zero, one])

    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))


def transform(image, CFG):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = CFG.IMAGE_SIZE[0]
    XDIM = DIM % 2  # fix for size 331   

    rot = CFG.ROT_ * tf.random.normal([1], dtype='float32')
    shr = CFG.SHR_ * tf.random.normal([1], dtype='float32')
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / CFG.HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / CFG.WZOOM_
    h_shift = CFG.HSHIFT_ * tf.random.normal([1], dtype='float32')
    w_shift = CFG.WSHIFT_ * tf.random.normal([1], dtype='float32')

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot, shr, h_zoom, w_zoom, h_shift, w_shift)

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat(tf.range(DIM // 2, -DIM // 2, -1), DIM)
    y = tf.tile(tf.range(-DIM // 2, DIM // 2), [DIM])
    z = tf.ones([DIM * DIM], dtype='int32')
    idx = tf.stack([x, y, z])

    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM // 2 + XDIM + 1, DIM // 2)

    # FIND ORIGIN PIXEL VALUES
    idx3 = tf.stack([DIM // 2 - idx2[0,], DIM // 2 - 1 + idx2[1,]])
    d = tf.gather_nd(image, tf.transpose(idx3))

    return tf.reshape(d, [DIM, DIM, 3])


def prepare_image(img, CFG, augment=True, dim=256):
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.reshape(img, [*CFG.IMAGE_SIZE, 3])

    if augment:
        img = transform(img, CFG)
        img = tf.image.random_flip_left_right(img)
        # img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)

    img = tf.reshape(img, [CFG.IMAGE_SIZE[0], CFG.IMAGE_SIZE[0], 3])

    return img


def get_dataset(
    files, CFG, augment=False, shuffle=False, repeat=False, labeled=True, batch_size=16, dim=224
    ):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    ds = ds.cache()

    if repeat:
        ds = ds.repeat()

    if shuffle:
        ds = ds.shuffle(1024 * 8)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)

    if labeled:
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(read_unlabeled_tfrecord, num_parallel_calls=AUTO)

    ds = ds.map(
        lambda img, imgname_or_label: (prepare_image(
            img, CFG, augment=augment, dim=dim), imgname_or_label), num_parallel_calls=AUTO
    )

    ds = ds.batch(batch_size * CFG.REPLICAS)
    ds = ds.prefetch(AUTO)
    return ds

In [3]:
def get_history(model, fold, files_train, files_valid, CFG):
    logger.info("Training...")
    history = model.fit(
        get_dataset(files_train, CFG),
        epochs=CFG.EPOCHS,
        callbacks=tr_fn.make_callbacks(CFG),
        steps_per_epoch=CFG.STEPS_PER_EPOCH,
        validation_data=get_dataset(files_valid, CFG),  # class_weight = {0:1,1:2},
        verbose=CFG.VERBOSE,
    )
    return history

def get_gcs_path(image_size):
    GCS_PATH_SELECT = {
        192: f"{CFG2.GCS_REPO}/tfrecords-jpeg-192x192",
        224: f"{CFG2.GCS_REPO}/tfrecords-jpeg-224x224v2",
        384: f"{CFG2.GCS_REPO}/tfrecords-jpeg-384x384",
        512: f"{CFG2.GCS_REPO}/tfrecords-jpeg-512x512",
    }
    GCS_PATH = GCS_PATH_SELECT[image_size]    
    return GCS_PATH

def oof_targets_names_folds(files_valid, CFG, fold, oof_tar, oof_folds, oof_names):
    ds_valid = get_dataset(
        files_valid,
        CFG,
        augment=False,
        repeat=False,
        dim=CFG.IMG_SIZES,
        labeled=True,
        return_image_names=True,
    )
    oof_tar.append(
        np.array([target.numpy() for img, target in iter(ds_valid.unbatch())])
    )
    oof_folds.append(np.ones_like(oof_tar[-1], dtype="int8") * fold)
    ds = get_dataset(
        files_valid,
        CFG,
        augment=False,
        repeat=False,
        dim=CFG.IMG_SIZES,
        labeled=False,
        return_image_names=True,
    )
    oof_names.append(
        np.array(
            [img_name.numpy().decode("utf-8") for img, img_name in iter(ds.unbatch())]
        )
    )
    return oof_tar, oof_folds, oof_names

In [4]:
cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')
tf.config.experimental_connect_to_cluster(cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
strategy = tf.distribute.TPUStrategy(cluster_resolver)

INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.


INFO:tensorflow:Initializing the TPU system: local


2023-10-28 09:30:26.977180: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55736b1bbb60 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
2023-10-28 09:30:26.977217: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): TPU, 2a886c8
2023-10-28 09:30:26.977230: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1): TPU, 2a886c8
2023-10-28 09:30:26.977240: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (2): TPU, 2a886c8
2023-10-28 09:30:26.977250: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (3): TPU, 2a886c8
2023-10-28 09:30:26.977260: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (4): TPU, 2a886c8
2023-10-28 09:30:26.977270: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (5): TPU, 2a886c8
2023-10-28 09:30:26.977279: I tensorflow/compiler/xla/service/service.cc:176]   StreamEx

INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:5, TPU, 0, 0)
I

In [5]:
CFG2.REPLICAS = strategy.num_replicas_in_sync
print("Number of accelerators: ", strategy.num_replicas_in_sync)

Number of accelerators:  8


In [6]:
GCS_PATH = get_gcs_path(CFG2.IMAGE_SIZE[0])
skf = KFold(n_splits=CFG2.FOLDS, shuffle=True, random_state=CFG2.SEED)
oof_pred = []
oof_tar = []
oof_val = []
oof_names = []
oof_folds = []

for fold, (idxT, idxV) in enumerate(skf.split(np.arange(107))):
    # DISPLAY FOLD INFO
    print("#" * 25)
    print("#### FOLD", fold + 1)

    files_train = tf.io.gfile.glob([f"{GCS_PATH}/train{x:02d}*.tfrec" for x in idxT])
    files_valid = tf.io.gfile.glob(f"{GCS_PATH}/train{x:02d}*.tfrec" for x in idxV)
    files_test = tf.io.gfile.glob(f"{GCS_PATH}/val*.tfrec")

    CFG2.NUM_TRAINING_IMAGES = tr_fn.count_data_items(files_train)
    CFG2.NUM_VALIDATION_IMAGES = tr_fn.count_data_items(files_valid)

    CFG = CFG(REPLICAS=CFG2.REPLICAS, NUM_TRAINING_IMAGES=CFG2.NUM_TRAINING_IMAGES, NUM_VALIDATION_IMAGES=CFG2.NUM_VALIDATION_IMAGES)

    logger.debug(
        f"Image Size {CFG.IMAGE_SIZE} with Model {CFG.MODEL} and batch_sz {CFG.BATCH_SIZE}"  # {CFG2.BASE_BATCH_SIZE * CFG2.REPLICAS}"
    )

    wandb.init(
        project="Mushroom-Classifier",
        tags=[CFG.MODEL, CFG.OPT, CFG.LR_SCHED, str(CFG.IMAGE_SIZE[0])],
        config=CFG,
        dir="../",
        config_exclude_keys=[
            "DEBUG", "GCS_REPO", "TRAIN", "ROOT", "DATA", "VERBOSE", "DISPLAY_PLOT", 
            "BASE_BATCH_SIZE", "WGTS", "OPT", "LR_SCHED", "MODEL"
        ],
        group="New dataset flow",
    )

    logger.info("Build & Compile Model...")
    
    K.clear_session()
    with strategy.scope():
        model = tr_fn.create_model(CFG, class_dict)
        opt = tr_fn.create_optimizer(CFG)
        loss = tf.keras.losses.SparseCategoricalCrossentropy()

        top3_acc = tf.keras.metrics.SparseTopKCategoricalAccuracy(
            k=3, name='sparse_top_3_categorical_accuracy'
        )
    model.compile(optimizer=opt, loss=loss, metrics=['sparse_categorical_accuracy', top3_acc])

    logger.info("Training Model...")
    # TRAIN
    history = get_history(model, fold, files_train, files_valid, CFG)

    # PREDICT OOF USING TTA
    logger.info("Predicting OOF with TTA...")
    ds_valid = get_dataset(files_valid, CFG),
    ct_valid = count_data_items(files_valid)
    STEPS = CFG.TTA * ct_valid / CFG.BATCH_SIZES / 4 / CFG.REPLICAS
    pred = model.predict(ds_valid, steps=STEPS, verbose=CFG.VERBOSE)[
        : CFG.TTA * ct_valid,
    ]
    oof_pred.append(np.mean(pred.reshape((ct_valid, CFG.TTA), order="F"), axis=1))
    # oof_pred.append(model.predict(get_dataset(files_valid,dim=CFG.IMG_SIZES),verbose=1))

    # GET OOF TARGETS AND NAMES
    oof_tar, oof_folds, oof_names = oof_targets_names_folds(files_valid, CFG, fold, oof_tar, oof_folds, oof_names)

    # REPORT RESULTS
    auc = roc_auc_score(oof_tar[-1], oof_pred[-1])
    oof_val.append(np.max(history.history["val_auc"]))
    logger.info(
        f"#### FOLD {fold + 1} OOF AUC without TTA = {oof_val[-1]}, with TTA = {auc}"
    )
    wandb.finish()

#########################
#### FOLD 1


[32m2023-10-28 09:31:51.637[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m23[0m - [34m[1mImage Size [224, 224] with Model swin_large_224 and batch_sz 256[0m
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mg-broughton[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112460422221677, max=1.0…

[32m2023-10-28 09:32:00.795[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m39[0m - [1mBuild & Compile Model...[0m
2023-10-28 09:32:03.172064: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-10-28 09:32:04.235421: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2023-10-28 09:32:04.542092: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:32:04.862338: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
[32m2023-10-28 09:33:36.420[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m52[0m - [1mTraining Model...[0m
[32m2023-10-28 09:33:36.423[0m | [1mINFO    [0m | [36m__main__[0m:[36mget

Epoch 1/30


2023-10-28 09:34:42.073321: I tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc:241] Subgraph fingerprint:5860969846676704230
2023-10-28 09:34:50.174523: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.
2023-10-28 09:35:04.881042: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:35:04.883934: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:35:04.884541: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:35:04.884629: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:35:04.885253: E ./tensorflow/compiler/xla/stream_executor/st


Epoch 1: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 329s - loss: 6.0392 - sparse_categorical_accuracy: 0.0035 - sparse_top_3_categorical_accuracy: 0.0085 - val_loss: 5.9788 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0129 - 329s/epoch - 9s/step
Epoch 2/30


2023-10-28 09:39:31.422539: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:39:42.680534: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:39:42.989824: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.



Epoch 2: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 44s - loss: 5.9582 - sparse_categorical_accuracy: 0.0046 - sparse_top_3_categorical_accuracy: 0.0122 - val_loss: 5.9498 - val_sparse_categorical_accuracy: 0.0050 - val_sparse_top_3_categorical_accuracy: 0.0152 - 44s/epoch - 1s/step
Epoch 3/30


2023-10-28 09:39:51.916816: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.
2023-10-28 09:40:11.850122: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.



Epoch 3: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 42s - loss: 5.9791 - sparse_categorical_accuracy: 0.0063 - sparse_top_3_categorical_accuracy: 0.0145 - val_loss: 5.9679 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0138 - 42s/epoch - 1s/step
Epoch 4/30

Epoch 4: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 44s - loss: 5.9544 - sparse_categorical_accuracy: 0.0037 - sparse_top_3_categorical_accuracy: 0.0119 - val_loss: 5.9466 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0150 - 44s/epoch - 1s/step
Epoch 5/30


2023-10-28 09:41:35.779411: E ./tensorflow/compiler/xla/stream_executor/stream_executor_internal.h:124] SetPriority unimplemented for this stream.



Epoch 5: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 43s - loss: 5.9351 - sparse_categorical_accuracy: 0.0039 - sparse_top_3_categorical_accuracy: 0.0141 - val_loss: 5.9365 - val_sparse_categorical_accuracy: 0.0050 - val_sparse_top_3_categorical_accuracy: 0.0118 - 43s/epoch - 1s/step
Epoch 6/30

Epoch 6: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 44s - loss: 5.9401 - sparse_categorical_accuracy: 0.0074 - sparse_top_3_categorical_accuracy: 0.0178 - val_loss: 5.9320 - val_sparse_categorical_accuracy: 0.0050 - val_sparse_top_3_categorical_accuracy: 0.0152 - 44s/epoch - 1s/step
Epoch 7/30

Epoch 7: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 42s - loss: 6.0317 - sparse_categorical_accuracy: 0.0056 - sparse_top_3_categorical_accuracy: 0.0119 - val_loss: 5.9529 - val_sparse_categorical_accuracy: 0.0026 - val_sparse_top_3_categorical_accuracy: 0.0110 - 42s/epoch - 1s/step
Epoch 8/30

Epoch 8: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 42s - loss: 5.9556 - sparse_categorical_accuracy: 0.0052 - sparse_top_3_categorical_accuracy: 0.0126 - val_loss: 5.9545 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0123 - 42s/epoch - 1s/step
Epoch 9/30

Epoch 9: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 43s - loss: 5.9565 - sparse_categorical_accuracy: 0.0037 - sparse_top_3_categorical_accuracy: 0.0137 - val_loss: 5.9395 - val_sparse_categorical_accuracy: 0.0050 - val_sparse_top_3_categorical_accuracy: 0.0128 - 43s/epoch - 1s/step
Epoch 10/30

Epoch 10: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 42s - loss: 5.9400 - sparse_categorical_accuracy: 0.0069 - sparse_top_3_categorical_accuracy: 0.0154 - val_loss: 5.9393 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0148 - 42s/epoch - 1s/step
Epoch 11/30

Epoch 11: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 45s - loss: 5.9478 - sparse_categorical_accuracy: 0.0082 - sparse_top_3_categorical_accuracy: 0.0137 - val_loss: 5.9304 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0167 - 45s/epoch - 1s/step
Epoch 12/30

Epoch 12: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 44s - loss: 5.9268 - sparse_categorical_accuracy: 0.0052 - sparse_top_3_categorical_accuracy: 0.0161 - val_loss: 5.9258 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0167 - 44s/epoch - 1s/step
Epoch 13/30

Epoch 13: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 44s - loss: 5.9482 - sparse_categorical_accuracy: 0.0059 - sparse_top_3_categorical_accuracy: 0.0132 - val_loss: 5.9238 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0167 - 44s/epoch - 1s/step
Epoch 14/30

Epoch 14: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 44s - loss: 5.9382 - sparse_categorical_accuracy: 0.0065 - sparse_top_3_categorical_accuracy: 0.0167 - val_loss: 5.9229 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0167 - 44s/epoch - 1s/step
Epoch 15/30

Epoch 15: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 42s - loss: 5.9422 - sparse_categorical_accuracy: 0.0043 - sparse_top_3_categorical_accuracy: 0.0141 - val_loss: 5.9463 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0149 - 42s/epoch - 1s/step
Epoch 16/30

Epoch 16: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 43s - loss: 5.9530 - sparse_categorical_accuracy: 0.0063 - sparse_top_3_categorical_accuracy: 0.0132 - val_loss: 5.9450 - val_sparse_categorical_accuracy: 0.0068 - val_sparse_top_3_categorical_accuracy: 0.0157 - 43s/epoch - 1s/step
Epoch 17/30


2023-10-28 09:50:03.483665: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 5317757149409005582
2023-10-28 09:50:03.483721: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 11245697772187935193
2023-10-28 09:50:03.483755: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 13883331947871256807
2023-10-28 09:50:03.483790: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 15150050127418225084
2023-10-28 09:50:03.483826: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 3251553944753356915
2023-10-28 09:50:03.483880: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 14250957387171225872
2023-10-28 09:50:03.483897: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv


Epoch 17: val_loss did not improve from 0.80000


[34m[1mwandb[0m: Adding directory to artifact (/home/broug/Mushroom-Classifier/models/swin_large_224/1028-0930)... Done. 0.0s


36/36 - 33s - loss: 5.9448 - sparse_categorical_accuracy: 0.0030 - sparse_top_3_categorical_accuracy: 0.0101 - val_loss: 5.9466 - val_sparse_categorical_accuracy: 0.0026 - val_sparse_top_3_categorical_accuracy: 0.0126 - 33s/epoch - 907ms/step


[32m2023-10-28 09:50:28.751[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m57[0m - [1mPredicting OOF with TTA...[0m


NameError: name 'count_data_items' is not defined