In [None]:
import os
import gc
import shutil
import json
import tensorflow as tf
import random
import numpy as np
from PIL import Image, ImageOps, ImageEnhance, UnidentifiedImageError
from collections import defaultdict
from tensorflow.keras.applications import EfficientNetB0
from sklearn.model_selection import train_test_split

In [15]:
if not os.path.exists('/kaggle/working/dataset'):
    os.makedirs('/kaggle/working/dataset')

for image_file in os.listdir('/kaggle/input/varroa-dataset/images'):
    shutil.copy(os.path.join('/kaggle/input/varroa-dataset/images', image_file), '/kaggle/working/dataset')

shutil.copy('/kaggle/input/varroa-dataset/_annotations.coco.json', '/kaggle/working/_annotations.coco.json')

'/kaggle/working/_annotations.coco.json'

In [16]:
# Setup paths
dataset_dir = "/kaggle/working/dataset"
annotations_path = "/kaggle/working/_annotations.coco.json"

with open(annotations_path, 'r') as f:
    coco_data = json.load(f)

bad_images = []
extra_images_in_dataset = set()
valid_image_ids = set()
bbox_issues = 0

image_info = {}
for image in coco_data['images']:
    image_path = os.path.join(dataset_dir, image['file_name'])
    image_id = image['id']
    try:
        with Image.open(image_path) as img:
            img.verify()
            image_info[image_id] = {'file_name': image['file_name'], 'width': img.width, 'height': img.height}
            valid_image_ids.add(image_id)
    except Exception as e:
        print(f"Removing bad image: {image['file_name']} due to error: {e}")
        bad_images.append(image_id)
        if os.path.exists(image_path):
            os.remove(image_path)

dataset_images = set(os.listdir(dataset_dir))
annotated_image_filenames = {info['file_name'] for info in image_info.values()}
extra_images_in_dataset = dataset_images - annotated_image_filenames

for image_filename in extra_images_in_dataset:
    os.remove(os.path.join(dataset_dir, image_filename))

for annotation in coco_data['annotations']:
    if annotation['image_id'] not in valid_image_ids:
        continue
    bbox = annotation['bbox']
    img_info = image_info[annotation['image_id']]
    if bbox[0] < 0 or bbox[1] < 0 or (bbox[0] + bbox[2]) > img_info['width'] or (bbox[1] + bbox[3]) > img_info['height']:
        bbox_issues += 1
        bad_images.append(annotation['image_id'])

coco_data['images'] = [img for img in coco_data['images'] if img['id'] not in bad_images]
coco_data['annotations'] = [anno for anno in coco_data['annotations'] if anno['image_id'] not in bad_images and anno['image_id'] in valid_image_ids]

with open(annotations_path, 'w') as f:
    json.dump(coco_data, f)

print(f"Removed {len(bad_images)} bad images and {bbox_issues} annotations with invalid bounding boxes.")
print(f"Deleted {len(extra_images_in_dataset)} extra images from dataset folder.")
print("Updated annotations file with corrections.")

Removing bad image: 2017-09-20_19-24-55-mp4-bee_id_3849-28485-1_png_jpg.rf.02aa0aa5986faba3a3b79f46fb5a39fa.jpg due to error: cannot identify image file '/kaggle/working/dataset/2017-09-20_19-24-55-mp4-bee_id_3849-28485-1_png_jpg.rf.02aa0aa5986faba3a3b79f46fb5a39fa.jpg'
Removing bad image: 2017-09-20_19-24-55-mp4-bee_id_4242-39465-1_png_jpg.rf.0ba1935457ff336b8bde1d1fce8d3524.jpg due to error: cannot identify image file '/kaggle/working/dataset/2017-09-20_19-24-55-mp4-bee_id_4242-39465-1_png_jpg.rf.0ba1935457ff336b8bde1d1fce8d3524.jpg'
Removing bad image: 2017-09-20_19-24-55-mp4-bee_id_3679-24315-1_png_jpg.rf.021a960da724e643dc19589ef89f73f2.jpg due to error: cannot identify image file '/kaggle/working/dataset/2017-09-20_19-24-55-mp4-bee_id_3679-24315-1_png_jpg.rf.021a960da724e643dc19589ef89f73f2.jpg'
Removing bad image: 2017-09-20_19-24-55-mp4-bee_id_4993-60795-1_png_jpg.rf.00f4b13659501d4e68f82d079483bb13.jpg due to error: cannot identify image file '/kaggle/working/dataset/2017-09-2

In [17]:
!pip install opencv-python
!pip install imgaug>=0.4.0

  pid, fd = os.forkpty()




In [6]:
coco_dataset_resize_dir = '/kaggle/working/coco_dataset_resize'
if not os.path.exists(coco_dataset_resize_dir):
    os.mkdir(coco_dataset_resize_dir)

In [7]:
shutil.copy("/kaggle/input/coco-resize/script.py","/kaggle/working/coco_dataset_resize/script.py")

'/kaggle/working/coco_dataset_resize/script.py'

In [31]:
%cd /kaggle/working/coco_dataset_resize

/kaggle/working/coco_dataset_resize


In [32]:
!python script.py --images_dir="/kaggle/working/dataset" --annotations_file="/kaggle/working/_annotations.coco.json" --image_width=512 --image_height=512 --output_ann_file="/kaggle/working/resized_annotations.coco.json" --output_img_dir="/kaggle/working/resized_dataset"

Loading annotations file...
Annotations file loaded.
Building dictionnaries...
Dictionnaries built.
Writing modified annotations to file...
Finished.


In [24]:
dataset_dir = "/kaggle/working/resized_dataset"
annotations_path = "/kaggle/working/resized_annotations.coco.json"

with open(annotations_path, 'r') as f:
    coco_data = json.load(f)

image_info = {image['id']: {'file_name': image['file_name'], 'width': image['width'], 'height': image['height']} for image in coco_data['images']}

bbox_issues = 0
missing_images = 0

for annotation in coco_data['annotations']:
    image_id = annotation['image_id']
    
    if image_id not in image_info:
        missing_images += 1
        continue 

    bbox = annotation['bbox']
    image_width = image_info[image_id]['width']
    image_height = image_info[image_id]['height']

    x, y, width, height = bbox

    if x < 0 or y < 0 or (x + width) > image_width or (y + height) > image_height:
        bbox_issues += 1

print(f"Total bounding box issues detected: {bbox_issues}.")
print(f"Total missing images referenced in annotations: {missing_images}.")

Total bounding box issues detected: 0.
Total missing images referenced in annotations: 0.


In [33]:
%cd /kaggle/working

/kaggle/working


In [34]:
dataset_dir = "/kaggle/working/resized_dataset"
annotations_path = "/kaggle/working/resized_annotations.coco.json"

train_dir = "/kaggle/working/train"
val_dir = "/kaggle/working/val"
test_dir = "/kaggle/working/test"

with open(annotations_path) as f:
    coco_data = json.load(f)

trainval_images, test_images = train_test_split(coco_data['images'], test_size=0.1, random_state=42)
train_images, val_images = train_test_split(trainval_images, test_size=0.222, random_state=42)

def filter_annotations(annotations, images):
    image_ids = set(img['id'] for img in images)
    return [anno for anno in annotations if anno['image_id'] in image_ids]

train_annotations = filter_annotations(coco_data['annotations'], train_images)
val_annotations = filter_annotations(coco_data['annotations'], val_images)
test_annotations = filter_annotations(coco_data['annotations'], test_images)

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

def move_images(image_list, target_dir):
    for image in image_list:
        source_path = os.path.join(dataset_dir, image['file_name'])
        target_path = os.path.join(target_dir, image['file_name'])
        shutil.move(source_path, target_path)

move_images(train_images, train_dir)
move_images(val_images, val_dir)
move_images(test_images, test_dir)

for name, subset, images in zip(['train', 'val', 'test'], 
                                [train_annotations, val_annotations, test_annotations], 
                                [train_images, val_images, test_images]):
    subset_path = f"/kaggle/working/{name}_annotations.coco.json"
    with open(subset_path, 'w') as f:
        json.dump({'images': images, 'annotations': subset, 'categories': coco_data['categories']}, f)
    print(f"Saved {name} annotations to {subset_path}")

Saved train annotations to /kaggle/working/train_annotations.coco.json
Saved val annotations to /kaggle/working/val_annotations.coco.json
Saved test annotations to /kaggle/working/test_annotations.coco.json


In [35]:
sets = ['train', 'val', 'test']
base_dir = "/kaggle/working"
annotation_paths = {s: os.path.join(base_dir, f"{s}_annotations.coco.json") for s in sets}
image_dirs = {s: os.path.join(base_dir, s) for s in sets}

def load_coco_data(annotation_path):
    with open(annotation_path, 'r') as file:
        return json.load(file)

def verify_dataset(images_dir, coco_data):
    annotated_images = set(img['file_name'] for img in coco_data['images'])
    dataset_images = set(os.listdir(images_dir))

    missing_in_folder = annotated_images - dataset_images
    extra_in_folder = dataset_images - annotated_images

    return missing_in_folder, extra_in_folder

for s in sets:
    coco_data = load_coco_data(annotation_paths[s])
    missing_in_folder, extra_in_folder = verify_dataset(image_dirs[s], coco_data)

    print(f"\n{s.upper()} SET:")
    if not missing_in_folder and not extra_in_folder:
        print("All files and annotations are correctly aligned.")
    else:
        if missing_in_folder:
            print(f"Missing in {s} folder: {len(missing_in_folder)} files")
            for f in list(missing_in_folder)[:5]:
                print(f" - {f}")
        if extra_in_folder:
            print(f"Extra in {s} folder: {len(extra_in_folder)} files")
            for f in list(extra_in_folder)[:5]:
                print(f" - {f}")


TRAIN SET:
All files and annotations are correctly aligned.

VAL SET:
All files and annotations are correctly aligned.

TEST SET:
All files and annotations are correctly aligned.


In [13]:
%cd /kaggle/working

/kaggle/working


In [14]:
models_dir = "/kaggle/working/models";
if os.path.exists(models_dir):
    shutil.rmtree(models_dir)
if not os.path.exists(models_dir):
    os.mkdir(models_dir)
!git clone https://github.com/tensorflow/models.git /kaggle/working/models

Cloning into '/kaggle/working/models'...
remote: Enumerating objects: 96741, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 96741 (delta 7), reused 16 (delta 6), pack-reused 96720[K
Receiving objects: 100% (96741/96741), 610.83 MiB | 33.41 MiB/s, done.
Resolving deltas: 100% (70425/70425), done.


In [5]:
%cd /kaggle/working/models/research/
!protoc object_detection/protos/*.proto --python_out=.
!cp object_detection/packages/tf2/setup.py .

/kaggle/working/models/research


In [6]:
!pip install .

Processing /kaggle/working/models/research
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting avro-python3 (from object_detection==0.1)
  Downloading avro-python3-1.10.2.tar.gz (38 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting contextlib2 (from object_detection==0.1)
  Downloading contextlib2-21.6.0-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting tf-slim (from object_detection==0.1)
  Downloading tf_slim-1.1.0-py2.py3-none-any.whl.metadata (1.6 kB)
Collecting pycocotools (from object_detection==0.1)
  Downloading pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting lvis (from object_detection==0.1)
  Downloading lvis-0.5.3-py3-none-any.whl.metadata (856 bytes)
Collecting tf-models-official>=2.5.1 (from object_detection==0.1)
  Downloading tf_models_official-2.16.0-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting pyparsing==2.4.7 (from object_detection==0.1)
  Downloading pyparsing-2.4.7-py2.py3-none-an

In [42]:
%cd /kaggle/working/models/research/

/kaggle/working/models/research


In [43]:
!python object_detection/dataset_tools/create_coco_tf_record.py \
    --logtostderr \
    --train_image_dir=/kaggle/working/train \
    --val_image_dir=/kaggle/working/val \
    --test_image_dir=/kaggle/working/test \
    --train_annotations_file=/kaggle/working/train_annotations.coco.json \
    --val_annotations_file=/kaggle/working/val_annotations.coco.json \
    --testdev_annotations_file=/kaggle/working/test_annotations.coco.json \
    --output_dir=/kaggle/working/coco_tf_records \
    --include_masks=False

I0331 17:02:52.380677 136406083946304 create_coco_tf_record.py:399] Found groundtruth annotations. Building annotations index.
I0331 17:02:52.572237 136406083946304 create_coco_tf_record.py:411] 0 images are missing annotations.
I0331 17:02:52.572844 136406083946304 create_coco_tf_record.py:441] On image 0 of 13138
I0331 17:02:52.885965 136406083946304 create_coco_tf_record.py:441] On image 100 of 13138
I0331 17:02:53.151529 136406083946304 create_coco_tf_record.py:441] On image 200 of 13138
I0331 17:02:53.398221 136406083946304 create_coco_tf_record.py:441] On image 300 of 13138
I0331 17:02:53.647791 136406083946304 create_coco_tf_record.py:441] On image 400 of 13138
I0331 17:02:53.911189 136406083946304 create_coco_tf_record.py:441] On image 500 of 13138
I0331 17:02:54.198255 136406083946304 create_coco_tf_record.py:441] On image 600 of 13138
I0331 17:02:54.456999 136406083946304 create_coco_tf_record.py:441] On image 700 of 13138
I0331 17:02:54.727456 136406083946304 create_coco_tf_

In [44]:
label_map = """item {
  id: 1
  name: 'Varroa'
}"""

with open("/kaggle/working/label_map.pbtxt", "w") as file:
    file.write(label_map)

In [17]:
efficientdet_src = "/kaggle/input/efficientdet-d0-coco17-tpu-32/efficientdet_d0_coco17_tpu-32"
efficientdet_dest = "/kaggle/working/efficientdet_d0_coco17_tpu-32/";
if os.path.exists(efficientdet_dest):
    shutil.rmtree(efficientdet_dest)
shutil.copytree(efficientdet_src, efficientdet_dest)

'/kaggle/working/efficientdet_d0_coco17_tpu-32/'

In [22]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format
import shutil
import os

config_path = '/kaggle/working/models/research/object_detection/configs/tf2/ssd_efficientdet_d0_512x512_coco17_tpu-8.config'
modified_config_path = '/kaggle/working/models/research/object_detection/configs/tf2/modified_effdet_d0.config'

config = config_util.get_configs_from_pipeline_file(config_path)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(config_path, "r") as f:
    proto_str = f.read()
    text_format.Merge(proto_str, pipeline_config)

pipeline_config.train_config.batch_size = 4
pipeline_config.model.ssd.num_classes = 1
pipeline_config.train_config.fine_tune_checkpoint = "/kaggle/working/efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path = "/kaggle/working/label_map.pbtxt"
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = ["/kaggle/working/coco_tf_records/coco_train.record-?????-of-00100",]
pipeline_config.eval_input_reader[0].label_map_path = "/kaggle/working/label_map.pbtxt"
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = ["/kaggle/working/coco_tf_records/coco_val.record-?????-of-00050",]

config_text = text_format.MessageToString(pipeline_config)
with tf.io.gfile.GFile(modified_config_path, "wb") as f:
    f.write(config_text)

print(f"Config file modified and saved to {modified_config_path}")

Config file modified and saved to /kaggle/working/models/research/object_detection/configs/tf2/modified_effdet_d0.config


In [None]:
!pip uninstall tensorflow --y
!pip install tensorflow==2.13.0

In [10]:
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

In [20]:
model_dir = "/kaggle/working/efficientdet_d0_train"
if os.path.exists(model_dir):
    shutil.rmtree(model_dir)

In [23]:
!python /kaggle/working/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=/kaggle/working/models/research/object_detection/configs/tf2/modified_effdet_d0.config \
    --model_dir=/kaggle/working/efficientdet_d0_train \
    --alsologtostderr \
    --num_train_steps=10000 \
    --sample_1_of_n_eval_examples=1 \
    --num_eval_steps=100

I0331 19:48:08.803999 138583278085952 mirrored_strategy.py:419] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
I0331 19:48:08.838015 138583278085952 config_util.py:552] Maybe overwriting train_steps: 10000
I0331 19:48:08.838391 138583278085952 config_util.py:552] Maybe overwriting use_bfloat16: False
I0331 19:48:08.854880 138583278085952 ssd_efficientnet_bifpn_feature_extractor.py:161] EfficientDet EfficientNet backbone version: efficientnet-b0
I0331 19:48:08.855222 138583278085952 ssd_efficientnet_bifpn_feature_extractor.py:163] EfficientDet BiFPN num filters: 64
I0331 19:48:08.855417 138583278085952 ssd_efficientnet_bifpn_feature_extractor.py:164] EfficientDet BiFPN num iterations: 3
I0331 19:48:08.870341 138583278085952 efficientnet_model.py:143] round_filter input=32 output=32
I0331 19:48:08.937191 138583278085952 efficientnet_model.py:143] round_filter input=32 output=32
I0331 19:48:08.937538 138583278085952 efficientnet_model.py:143] round_f

In [29]:
os.getcwd()

'/kaggle/working/models/research'

In [7]:
!python /kaggle/working/models/research/object_detection/exporter_main_v2.py \
    --input_type image_tensor \
    --pipeline_config_path /kaggle/working/models/research/object_detection/configs/tf2/modified_effdet_d0.config \
    --trained_checkpoint_dir /kaggle/working/efficientdet_d0_train \
    --output_directory /kaggle/working/exported_efficientdet_d0

Traceback (most recent call last):
  File "/kaggle/working/models/research/object_detection/exporter_main_v2.py", line 104, in <module>
    from object_detection import exporter_lib_v2
  File "/opt/conda/lib/python3.10/site-packages/object_detection/exporter_lib_v2.py", line 21, in <module>
    from object_detection.builders import model_builder
  File "/opt/conda/lib/python3.10/site-packages/object_detection/builders/model_builder.py", line 26, in <module>
    from object_detection.builders import hyperparams_builder
  File "/opt/conda/lib/python3.10/site-packages/object_detection/builders/hyperparams_builder.py", line 27, in <module>
    from object_detection.core import freezable_sync_batch_norm
  File "/opt/conda/lib/python3.10/site-packages/object_detection/core/freezable_sync_batch_norm.py", line 20, in <module>
    class FreezableSyncBatchNorm(tf.keras.layers.experimental.SyncBatchNormalization
AttributeError: module 'keras._tf_keras.keras.layers' has no attribute 'experimental'

In [28]:
!tar -czvf efficientdet_d0_train.tar.gz /kaggle/working/efficientdet_d0_train

tar: Removing leading `/' from member names
/kaggle/working/efficientdet_d0_train/
/kaggle/working/efficientdet_d0_train/ckpt-11.index
/kaggle/working/efficientdet_d0_train/ckpt-8.index
/kaggle/working/efficientdet_d0_train/ckpt-7.data-00000-of-00001
/kaggle/working/efficientdet_d0_train/ckpt-6.index
/kaggle/working/efficientdet_d0_train/ckpt-12.data-00000-of-00001
/kaggle/working/efficientdet_d0_train/ckpt-9.index
/kaggle/working/efficientdet_d0_train/ckpt-10.data-00000-of-00001
/kaggle/working/efficientdet_d0_train/train/
/kaggle/working/efficientdet_d0_train/train/events.out.tfevents.1711914516.9580c5ee9a3b.638.0.v2
/kaggle/working/efficientdet_d0_train/train/events.out.tfevents.1711913437.9580c5ee9a3b.164.0.v2
/kaggle/working/efficientdet_d0_train/ckpt-10.index
/kaggle/working/efficientdet_d0_train/ckpt-6.data-00000-of-00001
/kaggle/working/efficientdet_d0_train/ckpt-11.data-00000-of-00001
/kaggle/working/efficientdet_d0_train/ckpt-8.data-00000-of-00001
/kaggle/working/efficientdet