# Catch Basin Classifier

## Imports

In [1]:
import requests
import cv2
import tensorflow as tf
from pathlib import Path
import pandas as pd
from object_detection.utils import dataset_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

## Load the Model

In [2]:
MODEL_NAME = "ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8"
DOWNLOAD_URL = "http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz"

In [21]:
print("Downloading model...")
r = requests.get(DOWNLOAD_URL)
with open(MODEL_NAME + ".tar.gz", "wb") as f:
    f.write(r.content)
print("Model downloaded.")

Downloading model...
Model downloaded.


In [22]:
!tar -xf {MODEL_NAME + ".tar.gz"}

In [23]:
model = tf.saved_model.load(str(Path(MODEL_NAME, "saved_model").absolute()))

In [24]:
model

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject at 0x7fe531050e20>

## Label Map
`label_map.pbtxt` maps the class (name of number) to a number. 

In [7]:
!cat label_map.pbtxt

cat: label_map.pbtxt: No such file or directory


Define some utility functions to convert from class to `int` and `int` to class.

In [8]:
def class_to_int(class_name):
    if class_name == "blocked":
        return 1
    elif class_name == "partial":
        return 2
    elif class_name == "clear":
        return 3
    else:
        raise Exception("Invalid input")


def int_to_class(integer):
    if integer == 1:
        return "blocked"
    elif integer == 2:
        return "partial"
    elif integer == 3:
        return "clear"
    else:
        raise Exception("Invalid input")

## Prepare data

Convert all PASCAL VOC (XML) files in `data/` to a CSV file, `labels.csv`

In [9]:
import pandas as pd
import glob
import xml.etree.ElementTree as XMLElementTree

print("Converting to CSV...")
columns = ("filename", "class", "width", "height", "xmin", "ymin", "xmax", "ymax")
rows = []
for filename in glob.glob('data/*.xml'):
    parsed_obj = XMLElementTree.parse(filename)
    root = parsed_obj.getroot()
    filename = root.find("filename").text
    for obj in root.findall("object"):
        row = []
        row.append(filename)
        row.append(obj.find("name").text)  # name => class
        row.append(root.find("size").find("width").text)
        row.append(root.find("size").find("height").text)
        for i in obj.find("bndbox"):
            row.append(int(i.text))
        rows.append(row)

df = pd.DataFrame(rows, columns=columns)
df.to_csv("labels.csv", index=False)
print("Done.")

Converting to CSV...
Done.


Load the CSV file with Pandas

In [10]:
df = pd.read_csv("labels.csv")
df.head()

Unnamed: 0,filename,class,width,height,xmin,ymin,xmax,ymax
0,B1.JPG,blocked,659,800,316,550,458,609
1,C11.JPG,clear,526,702,216,509,359,591
2,B4.JPG,blocked,505,695,159,398,343,427
3,C6.JPG,clear,534,695,177,458,332,511
4,C12.JPG,clear,524,776,240,333,326,358


Split data into test data and train data

In [11]:
train_df = df.sample(frac=0.8, random_state=100)
test_df = df.drop(train_df.index).sample(frac=1.0)

Create TF Records within `annotations/` directory.

In [12]:
if not Path("annotations").exists():
    !mkdir annotations

def create_tf_record(dataframe, record_filename):
    with tf.io.TFRecordWriter(str(Path("annotations", record_filename))) as writer:
        for index, row in dataframe.iterrows():
            filename_encoded = row["filename"].encode("utf-8")
            width = int(row["width"])
            height = int(row["height"])
            encoded_jpg = None
            with tf.io.gfile.GFile(str(Path("data") / row["filename"]), "rb") as f:
                encoded_jpg = f.read()
            if encoded_jpg is None:
                raise Exception("Unable to read image: " + row["filename"])

            example = tf.train.Example(features=tf.train.Features(feature={
                "image/height": dataset_util.int64_feature(height),
                "image/width": dataset_util.int64_feature(width),
                "image/filename": dataset_util.bytes_feature(filename_encoded),
                "image/source_id": dataset_util.bytes_feature(filename_encoded),
                "image/encoded": dataset_util.bytes_feature(encoded_jpg),
                "image/format": dataset_util.bytes_feature(b"jpg"),
                "image/object/bbox/xmin": dataset_util.float_list_feature([int(row["xmin"]) / width]),
                "image/object/bbox/xmax": dataset_util.float_list_feature([int(row["xmax"]) / width]),
                "image/object/bbox/ymin": dataset_util.float_list_feature([int(row["ymin"]) / height]),
                "image/object/bbox/ymax": dataset_util.float_list_feature([int(row["ymax"]) / height]),
                "image/object/class/text": dataset_util.bytes_list_feature([row["class"].encode("utf-8")]),
                "image/object/class/label": dataset_util.int64_list_feature([class_to_int(row["class"])]),
            }))
            writer.write(example.SerializeToString())

# Train Data
create_tf_record(train_df, "train.record")
# Test Data
create_tf_record(test_df, "test.record")

## Edit Model Configuration
Edit `pipeline.config` to configure the model to be better for the data.

Load `pipeline.config` into a python object.

In [None]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(str(Path(MODEL_NAME, "pipeline.config")), "r") as f:
    text_format.Merge(f.read(), pipeline_config)

Edit attributes of `pipeline_config`.

In [None]:
# 3 Classes: blocked, partial, and clear.
pipeline_config.model.ssd.num_classes = 3
# Set batch_size based on memory available.
pipeline_config.train_config.batch_size = 4
# Path to checkpoint of model
pipeline_config.train_config.fine_tune_checkpoint = str(Path(MODEL_NAME, "checkpoint0", "ckpt-0"))
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path = str(Path("annotations", "label_map.pbtxt"))
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [str(Path("annotations", "train.record"))]
pipeline_config.eval_input_reader[0].label_map_path = str(Path("annotations", "label_map.pbtxt"))
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [str(Path("annotations", "test.record"))]

Save `pipeline_config` to `pipeline.config`.

In [29]:
with tf.io.gfile.GFile(str(Path(MODEL_NAME, "pipeline.config")), "w") as f:
    config_text = text_format.MessageToString(pipeline_config)
    f.write(config_text)

## Train the Model

In [30]:
!python tensorflow/models/research/object_detection/model_main_tf2.py --model_dir={MODEL_NAME} --pipeline_config_path={MODEL_NAME}/pipeline.config

2021-11-13 12:13:38.188949: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/snksynthesis/.local/lib/python3.8/site-packages/cv2/../../lib64:
2021-11-13 12:13:38.189011: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-11-13 12:13:40.310438: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/snksynthesis/.local/lib/python3.8/site-packages/cv2/../../lib64:/home/snksynthesis/.local/lib/python3.8/site-packages/cv2/../../lib64:
2021-11-13 12:13:40.310510: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-13 12:13:40

## Predict from Image

In [19]:
!python tensorflow/models/research/object_detection/model_main_tf2.py --model_dir={MODEL_NAME} --pipeline_config_path={MODEL_NAME}/pipeline.config --checkpoint_dir={MODEL_NAME} 

python: can't open file 'tensorflow/models/research/object_detection/model_main_tf2.py': [Errno 2] No such file or directory
