<a href="https://colab.research.google.com/github/Kanisha0710/fsui-assi/blob/main/BayWatch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%writefile label_map.pbtxt
item {
    id: 1
    name: 'debris'
}

item {
    id: 2
    name: 'cloud'
}

Overwriting label_map.pbtxt


In [None]:
%%writefile create_tfrecords.py
import os
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
import xml.etree.ElementTree as ET
import pandas as pd

def create_tf_example(image_path, annotation_path, label_map_dict):
    with tf.io.gfile.GFile(image_path, 'rb') as fid:
        encoded_image = fid.read()

    tree = ET.parse(annotation_path)
    root = tree.getroot()

    width = int(root.find('size').find('width').text)
    height = int(root.find('size').find('height').text)

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for obj in root.findall('object'):
        xmin = float(obj.find('bndbox').find('xmin').text) / width
        xmax = float(obj.find('bndbox').find('xmax').text) / width
        ymin = float(obj.find('bndbox').find('ymin').text) / height
        ymax = float(obj.find('bndbox').find('ymax').text) / height

        class_name = obj.find('name').text
        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

def main():
    label_map_path = 'label_map.pbtxt'
    train_images_dir = 'images/train'
    output_path = 'tfrecords/train.record'

    label_map_dict = label_map_util.get_label_map_dict(label_map_path)

    writer = tf.io.TFRecordWriter(output_path)

    image_files = [f for f in os.listdir(train_images_dir) if f.endswith('.jpg')]

    for image_file in image_files:
        image_path = os.path.join(train_images_dir, image_file)
        annotation_path = os.path.join(train_images_dir,
                                     os.path.splitext(image_file)[0] + '.xml')

        if os.path.exists(annotation_path):
            tf_example = create_tf_example(image_path, annotation_path, label_map_dict)
            writer.write(tf_example.SerializeToString())

    writer.close()

if __name__ == '__main__':
    main()


Writing create_tfrecords.py


In [None]:
%%writefile train_model.py
import tensorflow as tf
from object_detection import model_lib_v2

def main():
    pipeline_config_path = 'ssd_mobilenet_v2.config'
    model_dir = 'training'
    num_train_steps = 25000

    model_lib_v2.train_loop(
        pipeline_config_path=pipeline_config_path,
        model_dir=model_dir,
        train_steps=num_train_steps
    )

if __name__ == '__main__':
    main()

Writing train_model.py


In [4]:
%%writefile export_model.py
import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter_lib_v2

def main():
    pipeline_config_path = 'ssd_mobilenet_v2.config'
    trained_checkpoint_dir = 'training'
    output_directory = 'exported_model'

    exporter_lib_v2.export_inference_graph(
        pipeline_config_path=pipeline_config_path,
        trained_checkpoint_dir=trained_checkpoint_dir,
        output_directory=output_directory
    )

if __name__ == '__main__':
    main()

Writing export_model.py


In [None]:
%%writefile ssd_mobilenet_v2.config
model {
  ssd {
    num_classes: 2  # Number of classes (debris and cloud)
    image_resizer {
      fixed_shape_resizer {
        height: 300
        width: 300
      }
    }
    feature_extractor {
      type: "ssd_mobilenet_v2"
      depth_multiplier: 1.0
      min_depth: 16
      conv_hyperparams {
        regularizer {
          l2_regularizer {
            weight: 3.99999989895e-05
          }
        }
        initializer {
          truncated_normal_initializer {
            mean: 0.0
            stddev: 0.0299999993294
          }
        }
        activation: RELU_6
        batch_norm {
          decay: 0.999700009823
          center: true
          scale: true
          epsilon: 0.0010000000475
          train: true
        }
      }
      use_depthwise: true
    }
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    box_predictor {
      convolutional_box_predictor {
        conv_hyperparams {
          regularizer {
            l2_regularizer {
              weight: 3.99999989895e-05
            }
          }
          initializer {
            truncated_normal_initializer {
              mean: 0.0
              stddev: 0.0299999993294
            }
          }
          activation: RELU_6
          batch_norm {
            decay: 0.999700009823
            center: true
            scale: true
            epsilon: 0.0010000000475
            train: true
          }
        }
        min_depth: 0
        max_depth: 0
        num_layers_before_predictor: 0
        use_dropout: false
        dropout_keep_probability: 0.800000011921
        kernel_size: 3
        box_code_size: 4
        apply_sigmoid_to_scores: false
      }
    }
    anchor_generator {
      ssd_anchor_generator {
        num_layers: 6
        min_scale: 0.20000000298
        max_scale: 0.949999988079
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        aspect_ratios: 3.0
        aspect_ratios: 0.333299994469
      }
    }
    post_processing {
      batch_non_max_suppression {
        score_threshold: 0.300000011921
        iou_threshold: 0.600000023842
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
    normalize_loss_by_num_matches: true
    loss {
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_loss {
        weighted_sigmoid {
        }
      }
      hard_example_miner {
        num_hard_examples: 3000
        iou_threshold: 0.990000009537
        loss_type: CLASSIFICATION
        max_negatives_per_positive: 3
        min_negatives_per_image: 0
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
  }
}
train_config {
  batch_size: 24
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    ssd_random_crop {
    }
  }
  optimizer {
    momentum_optimizer {
      learning_rate {
        cosine_decay_learning_rate {
          learning_rate_base: 0.20000000298
          total_steps: 50000
          warmup_learning_rate: 0.0666666679084
          warmup_steps: 2000
        }
      }
      momentum_optimizer_value: 0.899999976158
    }
    use_moving_average: false
  }
  fine_tune_checkpoint: "ssd_mobilenet_v2_coco_2018_03_29/model.ckpt"
  num_steps: 50000
  startup_delay_steps: 0.0
  replicas_to_aggregate: 8
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
  fine_tune_checkpoint_type: "detection"
}
train_input_reader {
  label_map_path: "label_map.pbtxt"
  tf_record_input_reader {
    input_path: "tfrecords/train.record"
  }
}
eval_config {
  num_examples: 8000
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
}
eval_input_reader {
  label_map_path: "label_map.pbtxt"
  shuffle: false
  num_readers: 1
  tf_record_input_reader {
    input_path: "tfrecords/eval.record"
  }
}

Overwriting ssd_mobilenet_v2.config


In [3]:
%%writefile inference.py
import os
import numpy as np
import tensorflow as tf
import cv2
import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
import pandas as pd

class DetectionInference:
    def __init__(self, model_path='exported_model/saved_model', label_map_path='label_map.pbtxt'):
        # Load the model
        self.detect_fn = tf.saved_model.load(model_path)

        # Load the label map
        self.category_index = label_map_util.create_category_index_from_labelmap(
            label_map_path, use_display_name=True)

    def process_image(self, image_path, min_score_thresh=0.5):
        """Process a single image."""
        # Read and preprocess image
        image_np = cv2.imread(image_path)
        image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
        input_tensor = tf.convert_to_tensor(
            np.expand_dims(image_np, 0), dtype=tf.uint8)

        # Perform detection
        detections = self.detect_fn(input_tensor)

        # Process detections
        detection_scores = detections['detection_scores'][0].numpy()
        detection_boxes = detections['detection_boxes'][0].numpy()
        detection_classes = detections['detection_classes'][0].numpy().astype(np.int32)

        # Filter detections by threshold
        valid_detections = detection_scores >= min_score_thresh

        return {
            'filename': os.path.basename(image_path),
            'scores': detection_scores[valid_detections],
            'boxes': detection_boxes[valid_detections],
            'classes': detection_classes[valid_detections],
            'image': image_np
        }

    def process_directory(self, input_dir, output_dir, min_score_thresh=0.5):
        """Process all images in a directory."""
        os.makedirs(output_dir, exist_ok=True)
        results = []

        # Process each image
        for image_file in os.listdir(input_dir):
            if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(input_dir, image_file)
                print(f"Processing {image_file}...")

                # Get detections
                detection_result = self.process_image(image_path, min_score_thresh)

                # Save annotated image
                image_with_detections = detection_result['image'].copy()
                viz_utils.visualize_boxes_and_labels_on_image_array(
                    image_with_detections,
                    detection_result['boxes'],
                    detection_result['classes'],
                    detection_result['scores'],
                    self.category_index,
                    use_normalized_coordinates=True,
                    min_score_thresh=min_score_thresh,
                    line_thickness=8)

                # Convert back to BGR for OpenCV
                image_with_detections = cv2.cvtColor(image_with_detections, cv2.COLOR_RGB2BGR)
                output_path = os.path.join(output_dir, f"detected_{image_file}")
                cv2.imwrite(output_path, image_with_detections)

                # Store results
                for score, box, cls in zip(
                    detection_result['scores'],
                    detection_result['boxes'],
                    detection_result['classes']):
                    results.append({
                        'filename': image_file,
                        'class': self.category_index[cls]['name'],
                        'confidence': float(score),
                        'ymin': float(box[0]),
                        'xmin': float(box[1]),
                        'ymax': float(box[2]),
                        'xmax': float(box[3])
                    })

        # Save results to CSV
        if results:
            df = pd.DataFrame(results)
            csv_path = os.path.join(output_dir, 'detection_results.csv')
            df.to_csv(csv_path, index=False)
            print(f"\nResults saved to {csv_path}")

        return results

def main():
    """Run inference on a directory of images."""
    # Initialize detector
    detector = DetectionInference()

    # Set up directories
    inference_dir = 'images/inference'  # Directory containing unannotated images
    output_dir = 'inference_results'    # Directory to save results

    # Process all images
    print("Starting inference on images...")
    results = detector.process_directory(inference_dir, output_dir)

    # Print summary
    if results:
        df = pd.DataFrame(results)
        print("\nDetection Summary:")
        print(f"Total images processed: {df['filename'].nunique()}")
        print("\nDetections by class:")
        print(df['class'].value_counts())
        print(f"\nAverage confidence score: {df['confidence'].mean():.3f}")
    else:
        print("No detections found.")

if __name__ == '__main__':
    main()

Writing inference.py


In [8]:
# Create necessary directories
!mkdir -p images/train
!mkdir -p images/inference
!mkdir -p tfrecords
!mkdir -p training
!mkdir -p exported_model

In [9]:
from google.colab import files

uploaded = files.upload()  # Select your .xml files here


Saving 20160928_153233_0e16_16816-29821-16.jpg to 20160928_153233_0e16_16816-29821-16.jpg
Saving 20160928_153233_0e16_16816-29824-16.jpg to 20160928_153233_0e16_16816-29824-16.jpg
Saving 20160928_153233_0e16_16816-29824-16.xml to 20160928_153233_0e16_16816-29824-16.xml
Saving 20160928_153233_0e16_16816-29825-16.jpg to 20160928_153233_0e16_16816-29825-16.jpg
Saving 20160928_153233_0e16_16816-29825-16.xml to 20160928_153233_0e16_16816-29825-16.xml
Saving 20160928_153233_0e16_16816-29829-16.jpg to 20160928_153233_0e16_16816-29829-16.jpg
Saving 20160928_153233_0e16_16816-29829-16.xml to 20160928_153233_0e16_16816-29829-16.xml
Saving 20160928_153233_0e16_16816-29831-16.jpg to 20160928_153233_0e16_16816-29831-16.jpg
Saving 20160928_153233_0e16_16816-29831-16.xml to 20160928_153233_0e16_16816-29831-16.xml
Saving 20160928_153233_0e16_16817-29821-16.jpg to 20160928_153233_0e16_16817-29821-16.jpg
Saving 20161008_153104_0e3a_16807-29816-16.jpg to 20161008_153104_0e3a_16807-29816-16.jpg
Saving 201

In [12]:
import shutil

for filename in uploaded.keys():
    if filename.endswith('.jpg'):
        shutil.move(filename, f'images/train/{filename}')


In [14]:
!ls images/inference

 20160928_153233_0e16_16816-29828-16.jpg
 20160928_153233_0e16_16816-29830-16.jpg
 20160928_153233_0e16_16817-29827-16.jpg
 20160928_153233_0e16_16822-29830-16.jpg
 20160928_153233_0e16_16831-29829-16.jpg
 20160928_153233_0e16_16831-29830-16.jpg
 20160928_153233_0e16_16834-29830-16.jpg
 20161008_153104_0e3a_16807-29817-16.jpg
 20161008_153104_0e3a_16812-29820-16.jpg
 20161008_153104_0e3a_16813-29822-16.jpg
 20161008_153105_0e3a_16908-29805-16.jpg
 20161008_153105_0e3a_16930-29808-16.jpg
 20161008_153105_0e3a_16933-29811-16.jpg
 20161008_153105_0e3a_16939-29815-16.jpg
 20170227_203600_1_0c46_16902-29835-16.jpg
 20170227_203601_0c46_16902-29837-16.jpg
 20170322_153132_0e0f_17118-29799-16.jpg
 20170322_153132_0e0f_17126-29793-16.jpg
 20170322_153132_0e0f_17126-29794-16.jpg
 20170322_153132_0e0f_17126-29795-16.jpg
 20170323_152750_0e20_17285-29803-16.jpg
 20170323_152750_0e20_17287-29803-16.jpg
 20170323_152750_0e20_17293-29802-16.jpg
 20170323_152750_0e20_17296-29802-16.jpg
 20170323_1527