In [None]:
import os
import sys
import subprocess
import re
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import BackupAndRestore
import tensorflow_model_optimization as tfmot
from typing import List, Dict

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("GPU detected and configured for training:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found. Training will run on CPU.")

BATCH_SIZE = 64
QAT_FINETUNE_EPOCHS = 10
QAT_LEARNING_RATE = 1e-5

MODEL_ARCH = "32"
USE_CONTEXT = True
USE_FOVEA = True

DATASET = "KTH"
EXPERIMENT = "both32_500_64_ctx32x32_fov16x16"

CONTEXT_SHAPE = (32, 32, 1)
FOVEA_SHAPE = (16, 16, 1)

SHUFFLE_BUFFER = 30000
CACHE_TO_DISK = True
PREFETCH_TO_DEVICE = True


OUTPUT_DIR = f"{DATASET}_results/{EXPERIMENT}"

CLASS_NAMES = ["basketball","biking","diving",
               "golf_swing","horse_riding",
               "soccer_juggling","swing",
               "tennis_swing","trampoline_jumping",
               "volleyball_spiking","walking"] if DATASET == "UCF11" else ["boxing", "handclapping", "handwaving", "walking"]
NUM_CLASSES = len(CLASS_NAMES)

if not (USE_CONTEXT or USE_FOVEA):
    raise ValueError("At least one of USE_CONTEXT or USE_FOVEA must be True.")


2025-08-10 22:17:48.537851: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-08-10 22:17:48.537936: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-08-10 22:17:48.540986: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-10 22:17:48.551426: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPU detected and configured for training: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


2025-08-10 22:17:54.470319: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-08-10 22:17:54.527800: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-08-10 22:17:54.528799: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [2]:
CHECKPOINTS_DIR = os.path.join(OUTPUT_DIR, "checkpoints")
TEST_DATA_DIR = os.path.join(OUTPUT_DIR, "test_data")
TFLITE_DIR = os.path.join(OUTPUT_DIR, "quantized_models_qat")
os.makedirs(TFLITE_DIR, exist_ok=True)
PIPE_CACHE_DIR = os.path.join(OUTPUT_DIR, "pipeline_qat")
os.makedirs(PIPE_CACHE_DIR, exist_ok=True)
C_ARRAY_DIR = os.path.join(OUTPUT_DIR, "c_arrays_qat")
os.makedirs(C_ARRAY_DIR, exist_ok=True)

In [3]:
if MODEL_ARCH == "64":
    CHANNEL1_SIZE = 16
    CHANNEL2_SIZE = 16
    DENSE1_SIZE = 10
    DENSE2_SIZE = 16
elif MODEL_ARCH == "32":
    CHANNEL1_SIZE = 16
    CHANNEL2_SIZE = 32
    DENSE1_SIZE = 20
    DENSE2_SIZE = 32
elif MODEL_ARCH == "main":
    CHANNEL1_SIZE = 8
    CHANNEL2_SIZE = 16
    DENSE1_SIZE = 8
    DENSE2_SIZE = 8

def build_branch(input_shape, name_prefix=""):
    inp = tf.keras.Input(shape=input_shape, name=f"{name_prefix}input")
    x = tf.keras.layers.Conv2D(CHANNEL1_SIZE, kernel_size=(3, 3), kernel_initializer="he_normal", bias_initializer="zeros")(inp)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = tf.keras.layers.Conv2D(CHANNEL2_SIZE, kernel_size=(3, 3), kernel_initializer="he_normal", bias_initializer="zeros")(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Flatten()(x)
    return inp, x

def build_model(
    use_context: bool,
    use_fovea: bool,
    context_input_shape=CONTEXT_SHAPE,
    fovea_input_shape=FOVEA_SHAPE,
    num_classes=NUM_CLASSES
):
    inputs = []
    branches = []
    if use_context:
        ctx_inp, ctx_out = build_branch(context_input_shape, name_prefix="context_")
        inputs.append(ctx_inp)
        branches.append(ctx_out)
    if use_fovea:
        fov_inp, fov_out = build_branch(fovea_input_shape, name_prefix="fovea_")
        inputs.append(fov_inp)
        branches.append(fov_out)
    if len(branches) == 1:
        fused = branches[0]
        model_name = "SingleStream_Context" if use_context and not use_fovea else "SingleStream_Fovea"
    else:
        fused = tf.keras.layers.Concatenate()(branches)
        model_name = "MultiResFusion"
    z = tf.keras.layers.Dense(DENSE1_SIZE, kernel_initializer="he_normal", bias_initializer="zeros")(fused)
    z = tf.keras.layers.Dropout(0.5)(z)
    z = tf.keras.layers.Dense(DENSE2_SIZE, kernel_initializer="he_normal", bias_initializer="zeros")(z)
    z = tf.keras.layers.Dropout(0.5)(z)
    output = tf.keras.layers.Dense(num_classes, activation="softmax", kernel_initializer="he_normal", bias_initializer="zeros")(z)
    model = tf.keras.Model(inputs=inputs, outputs=output, name=model_name)
    return model

In [4]:
def make_tf_dataset(features_paths: Dict[str, List[str]],
                      labels: np.ndarray,
                      batch_size: int,
                      shuffle: bool,
                      drop_remainder: bool,
                      cache_file: str = None,
                      prefetch_to_device: bool = PREFETCH_TO_DEVICE):
    for k, v in features_paths.items():
        assert len(v) == len(labels), f"Length mismatch between {k} paths and labels."
    feat_elems = {k: tf.constant(v) for k, v in features_paths.items()}
    ds = tf.data.Dataset.from_tensor_slices((feat_elems, labels))
    opts = tf.data.Options()
    opts.experimental_deterministic = False
    opts.experimental_slack = True
    ds = ds.with_options(opts)
    if shuffle:
        ds = ds.shuffle(buffer_size=min(SHUFFLE_BUFFER, len(labels)))
    ctx_h, ctx_w, _ = CONTEXT_SHAPE
    fov_h, fov_w, _ = FOVEA_SHAPE
    def _loader(features, label):
        out_feats = {}
        if "context_input" in features:
            ctx = tf.io.read_file(features["context_input"])
            ctx = tf.io.decode_png(ctx, channels=1)
            ctx = tf.image.convert_image_dtype(ctx, tf.float32)
            ctx = tf.image.resize(ctx, [ctx_h, ctx_w])
            ctx = tf.ensure_shape(ctx, CONTEXT_SHAPE)
            out_feats["context_input"] = ctx
        if "fovea_input" in features:
            fov = tf.io.read_file(features["fovea_input"])
            fov = tf.io.decode_png(fov, channels=1)
            fov = tf.image.convert_image_dtype(fov, tf.float32)
            fov = tf.image.resize(fov, [fov_h, fov_w])
            fov = tf.ensure_shape(fov, FOVEA_SHAPE)
            out_feats["fovea_input"] = fov
        return out_feats, label
    ds = ds.map(_loader, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)
    if CACHE_TO_DISK and cache_file is not None:
        os.makedirs(os.path.dirname(cache_file), exist_ok=True)
        ds = ds.cache(cache_file)
    ds = ds.batch(batch_size, drop_remainder=drop_remainder)
    if prefetch_to_device:
        try:
            if hasattr(tf.data, "experimental") and hasattr(tf.data.experimental, "prefetch_to_device"):
                ds = ds.apply(tf.data.experimental.prefetch_to_device("/GPU:0"))
        except Exception:
            pass
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

In [None]:
class Tee(object):
    def __init__(self, filename, mode="a"):
        self.file = open(filename, mode, encoding="utf-8")
        self.stdout = sys.stdout
    def write(self, data):
        self.file.write(data); self.file.flush()
        self.stdout.write(data); self.stdout.flush()
    def flush(self):
        self.file.flush(); self.stdout.flush()

tee = Tee(os.path.join(OUTPUT_DIR, "qat_tunning_log.txt"), "a")
sys.stdout = tee
sys.stderr = tee


for fold_index in range(1, 6):
    print(f"\n=== Processing Fold {fold_index} ===")

    keras_model_path = os.path.join(CHECKPOINTS_DIR, f"fold_{fold_index}_best_model.keras")
    val_data_path = os.path.join(TEST_DATA_DIR, f"fold_{fold_index}_test_data.npz")
    tflite_model_path = os.path.join(TFLITE_DIR, f"fold_{fold_index}_qat_model.tflite")
    tmp_c_path = os.path.join(C_ARRAY_DIR, f"_tmp_fold{fold_index}.h")
    final_c_path = os.path.join(C_ARRAY_DIR, f"fold{fold_index}_model_data.h")

    if not os.path.exists(keras_model_path):
        print(f"Skipping Fold {fold_index}: Model file not found at {keras_model_path}")
        continue
        
    if not os.path.exists(val_data_path):
        print(f"Skipping Fold {fold_index}: Validation data not found at {val_data_path}")
        continue

    print(f"Loading float32 model from: {keras_model_path}")
    float_model = build_model(USE_CONTEXT, USE_FOVEA, CONTEXT_SHAPE, FOVEA_SHAPE, NUM_CLASSES)
    float_model.load_weights(keras_model_path)
    
    print("Applying Quantization-Aware Training wrapper...")
    quant_aware_model = tfmot.quantization.keras.quantize_model(float_model)
    
    quant_aware_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=QAT_LEARNING_RATE),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=['accuracy']
    )
    
    print(f"Loading validation data for fine-tuning from: {val_data_path}")
    val_data = np.load(val_data_path, allow_pickle=True)
    y_val_fold = val_data['y']
    
    features_val_fold = {}
    if USE_CONTEXT:
        features_val_fold["context_input"] = val_data["X_context_paths"].tolist()
    if USE_FOVEA:
        features_val_fold["fovea_input"] = val_data["X_fovea_paths"].tolist()

    cache_file = os.path.join(PIPE_CACHE_DIR, f"fold{fold_index}_val.cache")
    val_dataset = make_tf_dataset(
        features_val_fold, y_val_fold,
        batch_size=BATCH_SIZE,
        shuffle=False,
        drop_remainder=True,
        cache_file=cache_file
    )

    fold_best_checkpoint_path = os.path.join(TFLITE_DIR, f"fold_{fold_index}_best_model.keras")
    fold_backup_dir = os.path.join(TFLITE_DIR, f"fold_{fold_index}_backup")


    backup_and_restore_callback = BackupAndRestore(backup_dir=fold_backup_dir)
    
    print(f"Fine-tuning the QAT model for {QAT_FINETUNE_EPOCHS} epochs...")
    quant_aware_model.fit(
        val_dataset,
        epochs=QAT_FINETUNE_EPOCHS,
        verbose=2,
        callbacks=[
            backup_and_restore_callback,
        ])
    
    print("Converting the fine-tuned QAT model to TFLite INT8...")
    
    def representative_dataset_gen():
        for features, _ in val_dataset.take(10000):
            yield features

    converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset_gen
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8
    
    tflite_model_quant = converter.convert()
    
    with open(tflite_model_path, 'wb') as f:
        f.write(tflite_model_quant)
    
    with open(tmp_c_path, "w") as out:
        subprocess.run(["xxd", "-i", tflite_model_path], stdout=out)

    with open(tmp_c_path, "r") as f:
        content = f.read()

    var_name = f"fold{fold_index}_model"

    content = re.sub(r'unsigned char\s+\w+\s*\[\]', f'unsigned char {var_name}[]', content)
    content = re.sub(r'unsigned int\s+\w+_len', f'unsigned int {var_name}_len', content)

    guard_name = f"FOLD{fold_index}_MODEL_DATA_H"
    header_guard = f"#ifndef {guard_name}\n#define {guard_name}\n\n"
    footer_guard = "\n#endif\n"

    with open(final_c_path, "w") as f:
        f.write(header_guard)
        f.write(content)
        f.write(footer_guard)

    os.remove(tmp_c_path)
    print(f"✅ C array saved with variable: {var_name} in {final_c_path}")
        
    print(f"Successfully converted and saved TFLite model for Fold {fold_index} to:")
    print(tflite_model_path)
    
    original_size = os.path.getsize(keras_model_path) / 1024
    quantized_size = len(tflite_model_quant) / 1024
    print(f"Model size reduced from {original_size:.2f} KB to {quantized_size:.2f} KB")



=== Processing Fold 1 ===
Loading float32 model from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/checkpoints/fold_1_best_model.keras


2025-08-10 22:17:54.644740: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-08-10 22:17:54.645798: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-08-10 22:17:54.646697: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Applying Quantization-Aware Training wrapper...
Loading validation data for fine-tuning from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/test_data/fold_1_test_data.npz
Fine-tuning the QAT model for 10 epochs...
Epoch 1/10


2025-08-10 22:18:01.844556: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inMultiResFusion/quant_dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2025-08-10 22:18:02.100280: W tensorflow/core/grappler/optimizers/data/slack.cc:103] Could not find a final `prefetch` in the input pipeline to which to introduce slack.
2025-08-10 22:18:02.843312: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
2025-08-10 22:18:04.504626: I external/local_xla/xla/service/service.cc:168] XLA service 0x7b8626103ab0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-08-10 22:18:04.504757: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1650 Ti, Compute Capability 7.5
2025-08-10 22:18:04.517030: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.c

645/645 - 45s - loss: 0.2119 - accuracy: 0.9124 - 45s/epoch - 70ms/step
Epoch 2/10
645/645 - 8s - loss: 0.1500 - accuracy: 0.9525 - 8s/epoch - 12ms/step
Epoch 3/10
645/645 - 7s - loss: 0.1242 - accuracy: 0.9604 - 7s/epoch - 11ms/step
Epoch 4/10
645/645 - 6s - loss: 0.1234 - accuracy: 0.9622 - 6s/epoch - 10ms/step
Epoch 5/10
645/645 - 8s - loss: 0.1151 - accuracy: 0.9649 - 8s/epoch - 13ms/step
Epoch 6/10
645/645 - 6s - loss: 0.1172 - accuracy: 0.9646 - 6s/epoch - 9ms/step
Epoch 7/10
645/645 - 6s - loss: 0.1128 - accuracy: 0.9654 - 6s/epoch - 9ms/step
Epoch 8/10
645/645 - 6s - loss: 0.1140 - accuracy: 0.9653 - 6s/epoch - 9ms/step
Epoch 9/10
645/645 - 7s - loss: 0.1131 - accuracy: 0.9657 - 7s/epoch - 10ms/step
Epoch 10/10
645/645 - 7s - loss: 0.1139 - accuracy: 0.9645 - 7s/epoch - 11ms/step
Converting the fine-tuned QAT model to TFLite INT8...
INFO:tensorflow:Assets written to: /tmp/tmprtob_me5/assets
INFO:tensorflow:Assets written to: /tmp/tmprtob_me5/assets


2025-08-10 22:19:52.916796: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-08-10 22:19:52.916857: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-08-10 22:19:52.917495: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmprtob_me5
2025-08-10 22:19:52.927617: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-08-10 22:19:52.927654: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmprtob_me5
2025-08-10 22:19:52.948783: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2025-08-10 22:19:52.961483: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-08-10 22:19:53.207115: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmprtob_me5
2025-08

✅ C array saved with variable: fold1_model in KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/c_arrays_qat/fold1_model_data.h
Successfully converted and saved TFLite model for Fold 1 to:
KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/quantized_models_qat/fold_1_qat_model.tflite
Model size reduced from 494.82 KB to 45.69 KB

=== Processing Fold 2 ===
Loading float32 model from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/checkpoints/fold_2_best_model.keras


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8


Applying Quantization-Aware Training wrapper...
Loading validation data for fine-tuning from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/test_data/fold_2_test_data.npz
Fine-tuning the QAT model for 10 epochs...
Epoch 1/10


2025-08-10 22:20:07.128182: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inMultiResFusion/quant_dropout_4/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2025-08-10 22:20:07.557520: W tensorflow/core/grappler/optimizers/data/slack.cc:103] Could not find a final `prefetch` in the input pipeline to which to introduce slack.
2025-08-10 22:20:43.440560: W tensorflow/core/kernels/data/cache_dataset_ops.cc:302] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


645/645 - 39s - loss: 0.2469 - accuracy: 0.8973 - 39s/epoch - 61ms/step
Epoch 2/10
645/645 - 8s - loss: 0.1432 - accuracy: 0.9522 - 8s/epoch - 12ms/step
Epoch 3/10
645/645 - 7s - loss: 0.1183 - accuracy: 0.9613 - 7s/epoch - 11ms/step
Epoch 4/10
645/645 - 8s - loss: 0.1164 - accuracy: 0.9645 - 8s/epoch - 13ms/step
Epoch 5/10
645/645 - 7s - loss: 0.1045 - accuracy: 0.9668 - 7s/epoch - 11ms/step
Epoch 6/10
645/645 - 8s - loss: 0.1089 - accuracy: 0.9666 - 8s/epoch - 12ms/step
Epoch 7/10
645/645 - 8s - loss: 0.1072 - accuracy: 0.9663 - 8s/epoch - 12ms/step
Epoch 8/10
645/645 - 8s - loss: 0.1061 - accuracy: 0.9669 - 8s/epoch - 12ms/step
Epoch 9/10
645/645 - 7s - loss: 0.1075 - accuracy: 0.9665 - 7s/epoch - 11ms/step
Epoch 10/10
645/645 - 7s - loss: 0.1028 - accuracy: 0.9681 - 7s/epoch - 11ms/step
Converting the fine-tuned QAT model to TFLite INT8...
INFO:tensorflow:Assets written to: /tmp/tmpr_77ygyl/assets
INFO:tensorflow:Assets written to: /tmp/tmpr_77ygyl/assets


2025-08-10 22:21:58.970153: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-08-10 22:21:58.970185: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-08-10 22:21:58.970437: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpr_77ygyl
2025-08-10 22:21:58.979158: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-08-10 22:21:58.979192: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpr_77ygyl
2025-08-10 22:21:59.013018: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-08-10 22:21:59.250361: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpr_77ygyl
2025-08-10 22:21:59.326233: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 355798 

✅ C array saved with variable: fold2_model in KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/c_arrays_qat/fold2_model_data.h
Successfully converted and saved TFLite model for Fold 2 to:
KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/quantized_models_qat/fold_2_qat_model.tflite
Model size reduced from 494.78 KB to 45.73 KB

=== Processing Fold 3 ===
Loading float32 model from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/checkpoints/fold_3_best_model.keras


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8


Applying Quantization-Aware Training wrapper...
Loading validation data for fine-tuning from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/test_data/fold_3_test_data.npz
Fine-tuning the QAT model for 10 epochs...
Epoch 1/10


2025-08-10 22:22:13.530607: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inMultiResFusion/quant_dropout_8/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2025-08-10 22:22:13.923415: W tensorflow/core/grappler/optimizers/data/slack.cc:103] Could not find a final `prefetch` in the input pipeline to which to introduce slack.
2025-08-10 22:22:46.928793: W tensorflow/core/kernels/data/cache_dataset_ops.cc:302] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


645/645 - 37s - loss: 0.2560 - accuracy: 0.8908 - 37s/epoch - 57ms/step
Epoch 2/10
645/645 - 7s - loss: 0.1434 - accuracy: 0.9516 - 7s/epoch - 11ms/step
Epoch 3/10
645/645 - 7s - loss: 0.1270 - accuracy: 0.9592 - 7s/epoch - 11ms/step
Epoch 4/10
645/645 - 7s - loss: 0.1169 - accuracy: 0.9637 - 7s/epoch - 11ms/step
Epoch 5/10
645/645 - 7s - loss: 0.1182 - accuracy: 0.9631 - 7s/epoch - 11ms/step
Epoch 6/10
645/645 - 7s - loss: 0.1080 - accuracy: 0.9656 - 7s/epoch - 11ms/step
Epoch 7/10
645/645 - 7s - loss: 0.1049 - accuracy: 0.9672 - 7s/epoch - 11ms/step
Epoch 8/10
645/645 - 7s - loss: 0.1087 - accuracy: 0.9662 - 7s/epoch - 11ms/step
Epoch 9/10
645/645 - 7s - loss: 0.1109 - accuracy: 0.9654 - 7s/epoch - 11ms/step
Epoch 10/10
645/645 - 7s - loss: 0.1106 - accuracy: 0.9659 - 7s/epoch - 11ms/step
Converting the fine-tuned QAT model to TFLite INT8...
INFO:tensorflow:Assets written to: /tmp/tmpop_bg1n4/assets
INFO:tensorflow:Assets written to: /tmp/tmpop_bg1n4/assets


2025-08-10 22:23:55.994438: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-08-10 22:23:55.994471: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-08-10 22:23:55.994721: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpop_bg1n4
2025-08-10 22:23:56.004825: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-08-10 22:23:56.004864: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpop_bg1n4
2025-08-10 22:23:56.038305: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-08-10 22:23:56.296421: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpop_bg1n4
2025-08-10 22:23:56.395253: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 400532 

✅ C array saved with variable: fold3_model in KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/c_arrays_qat/fold3_model_data.h
Successfully converted and saved TFLite model for Fold 3 to:
KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/quantized_models_qat/fold_3_qat_model.tflite
Model size reduced from 494.78 KB to 45.73 KB

=== Processing Fold 4 ===
Loading float32 model from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/checkpoints/fold_4_best_model.keras


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8


Applying Quantization-Aware Training wrapper...
Loading validation data for fine-tuning from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/test_data/fold_4_test_data.npz
Fine-tuning the QAT model for 10 epochs...
Epoch 1/10


2025-08-10 22:24:09.758205: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inMultiResFusion/quant_dropout_12/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2025-08-10 22:24:10.159509: W tensorflow/core/grappler/optimizers/data/slack.cc:103] Could not find a final `prefetch` in the input pipeline to which to introduce slack.
2025-08-10 22:24:44.911301: W tensorflow/core/kernels/data/cache_dataset_ops.cc:302] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


645/645 - 38s - loss: 0.3037 - accuracy: 0.8788 - 38s/epoch - 59ms/step
Epoch 2/10
645/645 - 7s - loss: 0.1613 - accuracy: 0.9461 - 7s/epoch - 12ms/step
Epoch 3/10
645/645 - 7s - loss: 0.1377 - accuracy: 0.9583 - 7s/epoch - 12ms/step
Epoch 4/10
645/645 - 7s - loss: 0.1192 - accuracy: 0.9631 - 7s/epoch - 11ms/step
Epoch 5/10
645/645 - 8s - loss: 0.1178 - accuracy: 0.9647 - 8s/epoch - 12ms/step
Epoch 6/10
645/645 - 7s - loss: 0.1146 - accuracy: 0.9650 - 7s/epoch - 12ms/step
Epoch 7/10
645/645 - 7s - loss: 0.1139 - accuracy: 0.9665 - 7s/epoch - 11ms/step
Epoch 8/10
645/645 - 7s - loss: 0.1165 - accuracy: 0.9653 - 7s/epoch - 11ms/step
Epoch 9/10
645/645 - 7s - loss: 0.1124 - accuracy: 0.9651 - 7s/epoch - 11ms/step
Epoch 10/10
645/645 - 7s - loss: 0.1139 - accuracy: 0.9657 - 7s/epoch - 11ms/step
Converting the fine-tuned QAT model to TFLite INT8...
INFO:tensorflow:Assets written to: /tmp/tmpdb3deu_r/assets
INFO:tensorflow:Assets written to: /tmp/tmpdb3deu_r/assets


2025-08-10 22:25:59.259403: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-08-10 22:25:59.259440: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-08-10 22:25:59.259693: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpdb3deu_r
2025-08-10 22:25:59.268977: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-08-10 22:25:59.269053: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmpdb3deu_r
2025-08-10 22:25:59.299666: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-08-10 22:25:59.572290: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmpdb3deu_r
2025-08-10 22:25:59.652899: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 393208 

✅ C array saved with variable: fold4_model in KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/c_arrays_qat/fold4_model_data.h
Successfully converted and saved TFLite model for Fold 4 to:
KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/quantized_models_qat/fold_4_qat_model.tflite
Model size reduced from 494.82 KB to 45.74 KB

=== Processing Fold 5 ===
Loading float32 model from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/checkpoints/fold_5_best_model.keras


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8


Applying Quantization-Aware Training wrapper...
Loading validation data for fine-tuning from: KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/test_data/fold_5_test_data.npz
Fine-tuning the QAT model for 10 epochs...
Epoch 1/10


2025-08-10 22:26:13.464033: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inMultiResFusion/quant_dropout_16/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2025-08-10 22:26:13.788449: W tensorflow/core/grappler/optimizers/data/slack.cc:103] Could not find a final `prefetch` in the input pipeline to which to introduce slack.
2025-08-10 22:26:47.876917: W tensorflow/core/kernels/data/cache_dataset_ops.cc:302] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


645/645 - 38s - loss: 0.2673 - accuracy: 0.8958 - 38s/epoch - 58ms/step
Epoch 2/10
645/645 - 7s - loss: 0.1567 - accuracy: 0.9493 - 7s/epoch - 11ms/step
Epoch 3/10
645/645 - 7s - loss: 0.1283 - accuracy: 0.9593 - 7s/epoch - 11ms/step
Epoch 4/10
645/645 - 7s - loss: 0.1260 - accuracy: 0.9610 - 7s/epoch - 11ms/step
Epoch 5/10
645/645 - 7s - loss: 0.1130 - accuracy: 0.9644 - 7s/epoch - 11ms/step
Epoch 6/10
645/645 - 8s - loss: 0.1185 - accuracy: 0.9644 - 8s/epoch - 12ms/step
Epoch 7/10
645/645 - 8s - loss: 0.1211 - accuracy: 0.9636 - 8s/epoch - 12ms/step
Epoch 8/10
645/645 - 7s - loss: 0.1127 - accuracy: 0.9661 - 7s/epoch - 12ms/step
Epoch 9/10
645/645 - 7s - loss: 0.1142 - accuracy: 0.9647 - 7s/epoch - 11ms/step
Epoch 10/10
645/645 - 7s - loss: 0.1142 - accuracy: 0.9653 - 7s/epoch - 12ms/step
Converting the fine-tuned QAT model to TFLite INT8...
INFO:tensorflow:Assets written to: /tmp/tmps42vr_sd/assets
INFO:tensorflow:Assets written to: /tmp/tmps42vr_sd/assets


2025-08-10 22:28:01.909814: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-08-10 22:28:01.909844: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-08-10 22:28:01.910088: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmps42vr_sd
2025-08-10 22:28:01.916724: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-08-10 22:28:01.916764: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmps42vr_sd
2025-08-10 22:28:01.943269: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-08-10 22:28:02.172079: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmps42vr_sd
2025-08-10 22:28:02.259246: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 349156 

✅ C array saved with variable: fold5_model in KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/c_arrays_qat/fold5_model_data.h
Successfully converted and saved TFLite model for Fold 5 to:
KTH_results/results_he_normal_zeros_both32_500_64_ctx32x32_fov16x16/quantized_models_qat/fold_5_qat_model.tflite
Model size reduced from 494.78 KB to 45.74 KB


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8
