In [69]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras_cv
import os

## Data Pipeline

In [70]:
dataset_location = './Food ingredient recognition.v4i.tfrecord'
dataset_location = "/mnt/c/Tugas Raihan/Kuliah/Matkul/Semester 6/Bangkit/Machine-Learning-Capstone/Food ingredient recognition.v4i.tfrecord" # Punya Andi
tfrecord_name = "food-ingredient.tfrecord"
print(os.path.exists(dataset_location))

True


In [71]:
dataset_train = tf.data.TFRecordDataset(dataset_location + f"/train/{tfrecord_name}")

In [72]:
feature_description = {
    'image/object/bbox/ymin' : tf.io.VarLenFeature(tf.float32),
    'image/object/bbox/xmin' : tf.io.VarLenFeature(tf.float32),
    'image/object/bbox/ymax' : tf.io.VarLenFeature(tf.float32),
    'image/object/bbox/xmax' : tf.io.VarLenFeature(tf.float32),
    'image/object/class/text' : tf.io.VarLenFeature(tf.string),
    'image/object/class/label' : tf.io.VarLenFeature(tf.int64),
    'image/encoded' : tf.io.FixedLenFeature([], tf.string),
    'image/format' : tf.io.FixedLenFeature([], tf.string),
    'image/filename' : tf.io.FixedLenFeature([], tf.string),
    'image/height' : tf.io.FixedLenFeature([], tf.int64),
    'image/width' : tf.io.FixedLenFeature([], tf.int64)
}
def _parse_function(example_proto):
    parsed_example = tf.io.parse_single_example(example_proto, feature_description)
    for key in ['image/object/bbox/ymin', 'image/object/bbox/xmin', 'image/object/bbox/ymax', 'image/object/bbox/xmax', 'image/object/class/text', 'image/object/class/label']:
        parsed_example[key] = tf.sparse.to_dense(parsed_example[key])
    return parsed_example

In [73]:
from keras_cv import bounding_box
from keras_cv import visualization

class_ids = [
    "-",
    "almond",
    "apple",
    "avocado",
    "beef",
    "bell pepper",
    "blueberry",
    "bread",
    "broccoli",
    "butter",
    "carrot",
    "cheese",
    "chilli",
    "cookie",
    "corn",
    "cucumber",
    "egg",
    "eggplant",
    "garlic",
    "lemon",
    "milk",
    "mozarella cheese",
    "mushroom",
    "mussel",
    "onion",
    "oyster",
    "parmesan cheese",
    "pasta",
    "pork rib",
    "potato",
    "salmon",
    "scallop",
    "shrimp",
    "strawberry",
    "toast bread",
    "tomato",
    "tuna",
    "yogurt",
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

def preprocess_dataset(parsed_dataset):
    image = tf.image.decode_jpeg(parsed_dataset['image/encoded'])
    label = parsed_dataset['image/object/class/label']
    bounding_box = tf.stack([
        parsed_dataset['image/object/bbox/xmin'],
        parsed_dataset['image/object/bbox/ymin'],
        parsed_dataset['image/object/bbox/xmax'],
        parsed_dataset['image/object/bbox/ymax']
    ], axis=-1)
    image = tf.cast(image, tf.float32)
    return {
        'images': image,
        'bounding_boxes' : {
            'boxes': tf.cast(bounding_box, tf.float32),
            'classes': tf.cast(label, tf.int32)
        }
    }

def visualize_data(image, bounding_boxes):
    image = np.array(image)
    inference_resizing = keras_cv.layers.Resizing(
    640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
    )
    image = inference_resizing([image])
    visualization.plot_bounding_box_gallery(
        image,
        value_range=(0, 255),
        rows=1,
        cols=1,
        scale=5,
        bounding_box_format='xywh',
        y_pred=bounding_boxes
    )

augmenter = tf.keras.Sequential(
    layers=[
        keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xyxy"),
        keras_cv.layers.RandomShear(
            x_factor=0.2, y_factor=0.2, bounding_box_format="xyxy"
        ),
        keras_cv.layers.JitteredResize(
            target_size=(640, 640), scale_factor=(0.75, 1.3), bounding_box_format="xyxy"
        ),
    ]
)

In [74]:
from keras_cv import bounding_box

train_data = dataset_train.map(_parse_function).map(preprocess_dataset).shuffle(buffer_size=10_000).ragged_batch(4, drop_remainder=True)
train_data = train_data.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)

def dict_to_tuple(inputs):
    print(inputs)
    # return tf.RaggedTensor.to_tensor(inputs["images"]), bounding_box.to_dense(inputs["bounding_boxes"], max_boxes=32)
    return inputs["images"], bounding_box.to_dense(inputs["bounding_boxes"], max_boxes=32)

train_data = train_data.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
train_data = train_data.prefetch(tf.data.AUTOTUNE)

print(train_data)


{'images': <tf.Tensor 'args_2:0' shape=(4, 640, 640, None) dtype=float32>, 'bounding_boxes': {'boxes': tf.RaggedTensor(values=Tensor("RaggedFromVariant/RaggedTensorFromVariant:1", shape=(None, 4), dtype=float32), row_splits=Tensor("RaggedFromVariant/RaggedTensorFromVariant:0", shape=(5,), dtype=int64)), 'classes': tf.RaggedTensor(values=Tensor("RaggedFromVariant_1/RaggedTensorFromVariant:1", shape=(None,), dtype=float32), row_splits=Tensor("RaggedFromVariant_1/RaggedTensorFromVariant:0", shape=(5,), dtype=int64))}}
<_PrefetchDataset element_spec=(TensorSpec(shape=(4, 640, 640, None), dtype=tf.float32, name=None), {'boxes': TensorSpec(shape=(4, 32, 4), dtype=tf.float32, name=None), 'classes': TensorSpec(shape=(4, 32), dtype=tf.float32, name=None)})>


In [75]:
# import keras

# pretrained_model = keras_cv.models.YOLOV8Detector.from_preset(
#     "yolo_v8_m_pascalvoc", bounding_box_format="xywh"
# )
# image = keras.utils.load_img("stock-photo-airport-with-many-airplanes-at-beautiful-sunset-324754607.jpg")
# image = np.array(image)
# # print(image.shape)

# visualization.plot_image_gallery(
#     np.array([image]),
#     value_range=(0, 255),
#     rows=1,
#     cols=1,
#     scale=5,)
# inference_resizing = keras_cv.layers.Resizing(
#     640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
# )
# image_batch = inference_resizing([image])
# # print(image_batch)
# class_ids = [
#     "Aeroplane",
#     "Bicycle",
#     "Bird",
#     "Boat",
#     "Bottle",
#     "Bus",
#     "Car",
#     "Cat",
#     "Chair",
#     "Cow",
#     "Dining Table",
#     "Dog",
#     "Horse",
#     "Motorbike",
#     "Person",
#     "Potted Plant",
#     "Sheep",
#     "Sofa",
#     "Train",
#     "Tvmonitor",
#     "Total",
# ]
# # class_mapping = dict(zip(range(len(class_ids)), class_ids))
# # y_pred = pretrained_model.predict(image_batch)

# # print(y_pred)

# for sample in train_data_unbatched.take(1):
#     image = np.array(sample['images'])
#     inference_resizing = keras_cv.layers.Resizing(
#     640, 640, pad_to_aspect_ratio=True, bounding_box_format="xyxy"
#     )
#     image = inference_resizing([image])
#     y_pred = pretrained_model.predict(image)
#     print(y_pred)
#     visualization.plot_bounding_box_gallery(
#         image,
#         value_range=(0, 255),
#         rows=1,
#         cols=1,
#         y_pred=y_pred,
#         scale=5,
#         font_scale=0.7,
#         bounding_box_format="xyxy",
#         class_mapping=class_mapping,
#     )


In [76]:
# Get the number of classes in the dataset
# unique_classes = set()
# for parsed_dataset in train_data:
#     classes = parsed_dataset['bounding_boxes']['classes'].numpy()
#     unique_classes.update(np.unique(classes))

# NUM_CLASSES = len(unique_classes)
NUM_CLASSES = len(class_ids)

In [77]:
# Pretrained backbone
backbone = keras_cv.models.YOLOV8Backbone.from_preset(
    "yolo_v8_s_backbone_coco",
    include_rescaling = True,
)

model = keras_cv.models.YOLOV8Detector(
    num_classes=NUM_CLASSES,
    bounding_box_format="xyxy",
    backbone=backbone,
    fpn_depth=1,
)

In [78]:
freeze = 133
# Freeze to up to 133 layers
for i, layer in enumerate(model.layers):
    if i < freeze:
        layer.trainable = False


In [80]:
optimizer = tf.keras.optimizers.Adam(
    learning_rate=1e-4,
)

model.compile(
    optimizer=optimizer, classification_loss="binary_crossentropy", box_loss="ciou"
)

model.fit(train_data, epochs=10)

Epoch 1/10
      1/Unknown [1m34s[0m 34s/step - loss: 11649.3496

I0000 00:00:1717567372.182729   68483 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 42ms/step - loss: 11649.1953
Epoch 2/10


2024-06-05 13:03:22.726262: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-06-05 13:03:22.726838: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
  self.gen.throw(typ, value, traceback)


[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 43ms/step - loss: 11649.1035
Epoch 3/10


2024-06-05 13:03:56.645465: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-06-05 13:03:56.646597: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_6]]
2024-06-05 13:03:56.646621: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4928007475644499037
2024-06-05 13:03:56.646628: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 758603102464478797
2024-06-05 13:03:56.646668: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6071733332022983152


: 

: 

: 