<a href="https://colab.research.google.com/github/Michal287/computer_vision/blob/main/SSD_TF_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clone TFModels

In [None]:
!git clone --depth 1 https://github.com/tensorflow/models

Cloning into 'models'...
remote: Enumerating objects: 3934, done.[K
remote: Counting objects: 100% (3934/3934), done.[K
remote: Compressing objects: 100% (3050/3050), done.[K
remote: Total 3934 (delta 1139), reused 1885 (delta 830), pack-reused 0[K
Receiving objects: 100% (3934/3934), 49.69 MiB | 17.12 MiB/s, done.
Resolving deltas: 100% (1139/1139), done.


# Install Object Detection API

In [None]:
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

Processing /content/models/research
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting avro-python3 (from object-detection==0.1)
  Downloading avro-python3-1.10.2.tar.gz (38 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting apache-beam (from object-detection==0.1)
  Downloading apache_beam-2.49.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.6/14.6 MB 64.1 MB/s eta 0:00:00
Collecting lvis (from object-detection==0.1)
  Downloading lvis-0.5.3-py3-none-any.whl (14 kB)
Collecting tf-models-official>=2.5.1 (from object-detection==0.1)
  Downloading tf_models_official-2.13.1-py2.py3-none-any.whl (2.6 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.6/2.6 MB 69.5 MB/s eta 0:00:00
Collecting tensorflow_io (from object-detection==0.1)
  Downloading tensorflow_io-0.33.0-cp310-cp310-manylinux_2_

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
flax 0.7.1 requires PyYAML>=5.4.1, but you have pyyaml 5.3.1 which is incompatible.


# Download Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!7z x /content/drive/MyDrive/source.zip -odata/images
!7z x /content/drive/MyDrive/source2.zip -odata/images

Mounted at /content/drive

7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,4 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan /content/drive/MyDrive/                                 1 file, 583322523 bytes (557 MiB)

Extracting archive: /content/drive/MyDrive/source.zip
--
Path = /content/drive/MyDrive/source.zip
Type = zip
Physical Size = 583322523

  0%      2% 6 - source/image_103.png                               4% 11 - source/image_108.png                                6% 16 - source/image_112.png                                9% 22 - source/image_118.png        

# Create TF-Record

In [None]:
import tensorflow as tf
import os
import json
from object_detection.utils import dataset_util

def tfrecord_creator(image, annotations_list):
    image_tensor = tf.io.decode_image(image)
    height, width, _ = image_tensor.shape

    xmins, xmaxs, ymins, ymaxs = [], [], [], []
    classes_text = []

    for ann in annotations_list:
        x, y, w, h = ann['bbox']
        xmins.append(float(x) / width)
        xmaxs.append(float(x + w) / width)
        ymins.append(float(y) / height)
        ymaxs.append(float(y + h) / height)
        classes_text.append(str('Stab').encode('utf8'))

    feature_dict = {
        'image/encoded': dataset_util.bytes_feature(image.numpy()),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
    }

    return tf.train.Example(features=tf.train.Features(feature=feature_dict))

def main(coco_annotation_file, output_file, image_dir):
    with open(coco_annotation_file, 'r') as f:
        data = json.load(f)

    with tf.io.TFRecordWriter(output_file) as writer:
        for item in data['images']:
            annotations_list = [ann for ann in data['annotations'] if ann['image_id'] == item['id']]
            if not annotations_list:  # skip images without annotations
                continue
            image_path = os.path.join(image_dir, item['file_name'])
            if not os.path.exists(image_path):
                print(f"Image {image_path} not found.")
                continue
            image = tf.io.read_file(image_path)
            tf_example = tfrecord_creator(image, annotations_list)
            writer.write(tf_example.SerializeToString())

coco_annotation_file = '/content/data/annotations/merged_coco.json'
output_file = '/content/data/merged_coco.tfrecord'
image_dir = '/content/data/images'
main(coco_annotation_file, output_file, image_dir)

In [None]:
import tensorflow as tf

def split_tfrecord(tfrecord_path, train_path, val_path, split_ratio=0.8):
    """
    Podzieli podany plik TFRecord na dwa: treningowy i walidacyjny.

    Args:
    - tfrecord_path: Ścieżka do pierwotnego pliku TFRecord.
    - train_path: Ścieżka do nowego pliku TFRecord dla danych treningowych.
    - val_path: Ścieżka do nowego pliku TFRecord dla danych walidacyjnych.
    - split_ratio: Proporcja danych przeznaczonych na trening. Domyślnie 0.8 (80% na trening).
    """

    # Wczytaj wszystkie dane z pierwotnego pliku TFRecord
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)
    total_records = sum(1 for _ in raw_dataset)

    # Oblicz, ile rekordów powinno być w zbiorze treningowym i walidacyjnym
    train_size = int(total_records * split_ratio)
    val_size = total_records - train_size

    # Zapisz dane w nowych plikach TFRecord
    with tf.io.TFRecordWriter(train_path) as train_writer, tf.io.TFRecordWriter(val_path) as val_writer:
        for index, record in enumerate(raw_dataset):
            if index < train_size:
                train_writer.write(record.numpy())
            else:
                val_writer.write(record.numpy())

# Użycie funkcji
split_tfrecord('/content/data/merged_coco.tfrecord',
               '/content/data/train.tfrecord',
               '/content/data/val.tfrecord',
               split_ratio=0.8)

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2

def draw_bbox(image, bbox, class_name):
    color = (0, 0, 255)  # czerwony w formacie RGB
    image_h, image_w, _ = image.shape
    start_point = (int(bbox[0] * image_w), int(bbox[1] * image_h))
    end_point = (int(bbox[2] * image_w), int(bbox[3] * image_h))
    image = cv2.rectangle(image, start_point, end_point, color, 2)
    image = cv2.putText(image, class_name, (start_point[0], start_point[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
    return image

def extract_image_from_tfrecord(tfrecord_path):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)

    # Aktualizacja opisu cech o bounding boxy i klasy
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/text': tf.io.VarLenFeature(tf.string)
    }

    def _parse_function(example_proto):
        return tf.io.parse_single_example(example_proto, feature_description)

    parsed_dataset = raw_dataset.map(_parse_function)
    for features in parsed_dataset.take(20):  # Tu możesz zmienić liczbę obrazów, które chcesz wyświetlić. Zastąp 1 inną wartością, jeśli chcesz zobaczyć więcej obrazów.
        image_data = features['image/encoded'].numpy()
        image = tf.image.decode_jpeg(image_data, channels=3).numpy()

        # Wyciągnięcie bounding boxów i klasy dla każdego obiektu na obrazie
        xmin = tf.sparse.to_dense(features['image/object/bbox/xmin']).numpy()
        ymin = tf.sparse.to_dense(features['image/object/bbox/ymin']).numpy()
        xmax = tf.sparse.to_dense(features['image/object/bbox/xmax']).numpy()
        ymax = tf.sparse.to_dense(features['image/object/bbox/ymax']).numpy()
        class_texts = tf.sparse.to_dense(features['image/object/class/text']).numpy()

        # Rysowanie bounding boxów i tekstu klasy na obrazie
        for x1, y1, x2, y2, class_text in zip(xmin, ymin, xmax, ymax, class_texts):
            bbox = [x1, y1, x2, y2]
            class_name = class_text.decode('utf-8')
            image = draw_bbox(image, bbox, class_name)

        plt.imshow(image)
        plt.axis('off')
        plt.show()

# Podmień 'path_to_tfrecord' na ścieżkę do Twojego pliku TFRecord
extract_image_from_tfrecord('/content/data/train.tfrecord')

# Download Model

In [None]:
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz
!tar -xf ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz
fine_tune_checkpoint = 'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/ckpt-0'

--2023-08-11 08:40:02--  http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 142.251.10.128, 2404:6800:4003:c00::80
Connecting to download.tensorflow.org (download.tensorflow.org)|142.251.10.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20518283 (20M) [application/x-tar]
Saving to: ‘ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz’


2023-08-11 08:40:03 (56.4 MB/s) - ‘ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz’ saved [20518283/20518283]



# Prepare before train

In [None]:
#https://github.com/tensorflow/models/blob/master/research/object_detection/configs/tf2/faster_rcnn_resnet50_v1_fpn_640x640_coco17_tpu-8.config
!wget https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
base_config_path = 'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config'

--2023-08-11 08:46:01--  https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4684 (4.6K) [text/plain]
Saving to: ‘ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config’


2023-08-11 08:46:02 (48.3 MB/s) - ‘ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config’ saved [4684/4684]



In [None]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(base_config_path, "r") as f:
    proto_str = f.read()
    text_format.Merge(proto_str, pipeline_config)

pipeline_config.model.ssd.num_classes = 4
# pipeline_config.model.ssd.add_background_class = True
pipeline_config.train_config.batch_size = 2
pipeline_config.train_config.fine_tune_checkpoint = 'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/ckpt-0'
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path = 'labelmap.pbtxt'
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = ['train.tfrecord']
pipeline_config.eval_input_reader[0].label_map_path = 'labelmap.pbtxt'
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = ['test.tfrecord']
pipeline_config.train_config.num_steps = 2000

config_text = text_format.MessageToString(pipeline_config)
with tf.io.gfile.GFile(base_config_path, "wb") as f:
    f.write(config_text)

In [None]:
%cat $base_config_path

model {
  ssd {
    num_classes: 4
    image_resizer {
      fixed_shape_resizer {
        height: 640
        width: 640
      }
    }
    feature_extractor {
      type: "ssd_mobilenet_v2_fpn_keras"
      depth_multiplier: 1.0
      min_depth: 16
      conv_hyperparams {
        regularizer {
          l2_regularizer {
            weight: 4e-05
          }
        }
        initializer {
          random_normal_initializer {
            mean: 0.0
            stddev: 0.01
          }
        }
        activation: RELU_6
        batch_norm {
          decay: 0.997
          scale: true
          epsilon: 0.001
        }
      }
      use_depthwise: true
      override_base_feature_extractor_hyperparams: true
      fpn {
        min_level: 3
        max_level: 7
        additional_layer_depth: 128
      }
    }
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      arg

# Train

In [None]:
model_dir = 'training4/'

In [None]:
!python /content/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path="ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config" \
    --model_dir={model_dir} \
    --num_eval_steps=500

2023-08-11 10:02:01.441800: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
I0811 10:02:01.442843 134875591393280 mirrored_strategy.py:419] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
INFO:tensorflow:Maybe overwriting train_steps: None
I0811 10:02:01.465642 134875591393280 config_util.py:552] Maybe overwriting train_steps: None
INFO:tensorflow:Maybe overwriting use_bfloat16: False
I0811 10:02:01.465810 134875591393280 config_util.py:552] Maybe overwriting use_bfloat16: False
Instructions for updating:
rename to distribute_datasets_from_function
W0811 10:02:01.644121 134875591393280 deprecation.py:364] From /usr/local/lib/python3.10/dist-packages/object_detection/model_lib_v2.py:563: StrategyBase.experimenta

In [None]:
%load_ext tensorboard
%tensorboard --logdir '/content/training4/train'

In [None]:
!mkdir "export"

!python "/content/models/research/object_detection/exporter_main_v2.py" --input_type='image_tensor' --pipeline_config_path="ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.config" --trained_checkpoint_dir="/content/training4" --output_directory="export2"

mkdir: cannot create directory ‘export’: File exists
2023-08-11 10:11:12.819676: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.map_fn(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))
W0811 10:11:13.150201 139333871652864 deprecation.py:641] From /usr/local/lib/python3.10/dist-packages/tensorflow/python/autograph/impl/api.py:459: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with back_prop=False is deprecated and will be removed in a future version.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.map_fn(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.

In [None]:
tf.keras.backend.clear_session()
model = tf.saved_model.load(f'/content/export2/saved_model')

In [None]:
from six import BytesIO

def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: a file path (this can be local or on colossus)

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import numpy as np

import tensorflow as tf
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util


In [None]:
category_index = label_map_util.create_category_index_from_labelmap('labelmap.pbtxt', use_display_name=True)

In [None]:
def run_inference_for_single_image(model, image):
  image = np.asarray(image)
  # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
  input_tensor = tf.convert_to_tensor(image)
  # The model expects a batch of images, so add an axis with `tf.newaxis`.
  input_tensor = input_tensor[tf.newaxis,...]

  # Run inference
  model_fn = model.signatures['serving_default']
  output_dict = model_fn(input_tensor)

  # All outputs are batches tensors.
  # Convert to numpy arrays, and take index [0] to remove the batch dimension.
  # We're only interested in the first num_detections.
  num_detections = int(output_dict.pop('num_detections'))
  output_dict = {key:value[0, :num_detections].numpy()
                 for key,value in output_dict.items()}
  output_dict['num_detections'] = num_detections

  # detection_classes should be ints.
  output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)

  # Handle models with masks:
  if 'detection_masks' in output_dict:
    # Reframe the the bbox mask to the image size.
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
              output_dict['detection_masks'], output_dict['detection_boxes'],
               image.shape[0], image.shape[1])
    detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                       tf.uint8)
    output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()

  return output_dict

In [None]:
for image_path in glob.glob('/content/data/images/source/*.png'):
  image_np = load_image_into_numpy_array(image_path)
  output_dict = run_inference_for_single_image(model, image_np)
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks_reframed', None),
      use_normalized_coordinates=True,
      line_thickness=8)
  display(Image.fromarray(image_np))