In [None]:
!pip install tensorflow-object-detection-api


# Converting Training XML annotations to CSV

In [None]:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
from tqdm import tqdm

def __list_to_csv(annotations, output_file):
    column_name = ['filename', 'folder', 'source_database', 'width', 'height', 'depth',
                   'segmented', 'object_name', 'object_pose', 'object_truncated',
                   'object_difficult', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(annotations, columns=column_name)
    xml_df.to_csv(output_file, index=None)

def xml_to_csv(xml_dir, output_file):
    """Reads all XML files from a directory and generates a single CSV file"""
    annotations = []
    for xml_file in tqdm(glob.glob(xml_dir + '/*.xml')):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        filename = root.find('filename').text
        folder = root.find('folder').text
        source_database = root.find('source/database').text
        width = int(root.find('size/width').text)
        height = int(root.find('size/height').text)
        depth = int(root.find('size/depth').text)
        segmented = int(root.find('segmented').text)

        for obj in root.findall('object'):
            name = obj.find('name').text
            pose = obj.find('pose').text
            truncated = int(obj.find('truncated').text)
            difficult = int(obj.find('difficult').text)
            xmin = int(obj.find('bndbox/xmin').text)
            ymin = int(obj.find('bndbox/ymin').text)
            xmax = int(obj.find('bndbox/xmax').text)
            ymax = int(obj.find('bndbox/ymax').text)

            annotations.append((filename, folder, source_database, width, height, depth,
                                segmented, name, pose, truncated, difficult,
                                xmin, ymin, xmax, ymax))

    __list_to_csv(annotations, output_file)

xml_dir = "/content/NEU-DET/train/annotations"
output_csv = "/content/NEU-DET/train/training_annotations.csv"
xml_to_csv(xml_dir, output_csv)


100%|██████████| 1440/1440 [00:00<00:00, 10866.82it/s]


#Converting Validation XML annotations to CSV

In [None]:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
from tqdm import tqdm

def __list_to_csv(annotations, output_file):
    column_name = ['filename', 'folder', 'source_database', 'width', 'height', 'depth',
                   'segmented', 'object_name', 'object_pose', 'object_truncated',
                   'object_difficult', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(annotations, columns=column_name)
    xml_df.to_csv(output_file, index=None)

def xml_to_csv(xml_dir, output_file):
    """Reads all XML files from a directory and generates a single CSV file"""
    annotations = []
    for xml_file in tqdm(glob.glob(xml_dir + '/*.xml')):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        filename = root.find('filename').text
        folder = root.find('folder').text
        source_database = root.find('source/database').text
        width = int(root.find('size/width').text)
        height = int(root.find('size/height').text)
        depth = int(root.find('size/depth').text)
        segmented = int(root.find('segmented').text)

        for obj in root.findall('object'):
            name = obj.find('name').text
            pose = obj.find('pose').text
            truncated = int(obj.find('truncated').text)
            difficult = int(obj.find('difficult').text)
            xmin = int(obj.find('bndbox/xmin').text)
            ymin = int(obj.find('bndbox/ymin').text)
            xmax = int(obj.find('bndbox/xmax').text)
            ymax = int(obj.find('bndbox/ymax').text)

            annotations.append((filename, folder, source_database, width, height, depth,
                                segmented, name, pose, truncated, difficult,
                                xmin, ymin, xmax, ymax))

    __list_to_csv(annotations, output_file)

xml_dir = "/content/NEU-DET/validation/annotations"
output_csv = "/content/NEU-DET/validation/validation_annotations.csv"
xml_to_csv(xml_dir, output_csv)


100%|██████████| 361/361 [00:00<00:00, 2362.45it/s]


#Creating Training TF Records


In [None]:
import pandas as pd

# Read the CSV file
csv_file = "/content/NEU-DET/train/training_annotations.csv"
df = pd.read_csv(csv_file)

# Iterate through the filenames and append ".jpg" if not already present
for i, filename in enumerate(df['filename']):
    if not filename.endswith(".jpg"):
        df.loc[i, 'filename'] += ".jpg"

# Save the modified DataFrame back to the CSV file
df.to_csv(csv_file, index=False)


In [None]:
import os
import pandas as pd
import tensorflow as tf
from PIL import Image
import io
from object_detection.utils import dataset_util


def image_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(
        bytes_list=tf.train.BytesList(value=[tf.io.encode_jpeg(value).numpy()])
    )

def create_tf_example(filename, image_dir, annotations):
    image_path = os.path.join(image_dir, filename)
    with tf.io.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    image = tf.image.decode_jpeg(encoded_jpg, channels=3)  # Decode image to uint8 tensor
    height, width, _ = image.shape

    filename = filename.encode('utf8')

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    difficult = []
    truncated = []

    # Extract annotations for the current image from the annotations DataFrame
    image_annotations = annotations[annotations['filename'] == filename.decode('utf-8')]
    for index, annotation in image_annotations.iterrows():
        xmins.append(annotation['xmin'] / width)
        xmaxs.append(annotation['xmax'] / width)
        ymins.append(annotation['ymin'] / height)
        ymaxs.append(annotation['ymax'] / height)
        classes_text.append(annotation['object_name'].encode('utf8'))
        difficult.append(annotation['object_difficult'])
        truncated.append(annotation['object_truncated'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/source_id': dataset_util.bytes_feature(filename),  # source_id can be same as filename
            'image/encoded': dataset_util.bytes_feature(tf.io.encode_jpeg(image).numpy()),  # Encode image to JPEG
            'image/format': dataset_util.bytes_feature(b'jpeg'),
            'image/object/bndbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bndbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bndbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bndbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
            'image/object/difficult': dataset_util.int64_list_feature(difficult),
            'image/object/truncated': dataset_util.int64_list_feature(truncated),
        }
    ))
    return tf_example

def main():
    # Define file paths
    csv_input = "/content/NEU-DET/train/training_annotations.csv"
    image_dir = "/content/Train_Images/images"

    # Read CSV file containing image annotations
    annotations = pd.read_csv(csv_input)

    # Iterate through each unique filename in the CSV file
    for filename in annotations['filename'].unique():
        output_path = f"/content/Train_tfrecords/output_{os.path.splitext(filename)[0]}.tfrecord"

        # Create a TFRecord for the current image
        tf_example = create_tf_example(filename, image_dir, annotations)

        # Write TFRecord to file
        with tf.io.TFRecordWriter(output_path) as writer:
            writer.write(tf_example.SerializeToString())

        print(f'Successfully created TFRecord for {filename}. Output path: {output_path}')

if __name__ == "__main__":
    main()

In [None]:
filenames = ["/content/Train_tfrecords/output_crazing_21.tfrecord"]
raw_dataset = tf.data.TFRecordDataset(filenames)
raw_dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

#Creation of Training tfrecords

In [None]:
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple

def create_tf_example(filename, image_dir, annotations):
    image_path = os.path.join(image_dir, filename)
    with tf.io.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()

    # Convert the raw image data to JPEG format
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    output_io = io.BytesIO()
    image.save(output_io, format='JPEG')
    encoded_jpg = output_io.getvalue()

    width, height = image.size

    filename = filename.encode('utf8')
    source_id = filename
    image_format = b'jpg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    difficult = []
    truncated = []

    # Extract annotations for the current image from the annotations DataFrame
    image_annotations = annotations[annotations['filename'] == filename.decode('utf-8')]
    for index, annotation in image_annotations.iterrows():
        xmins.append(annotation['xmin'])
        xmaxs.append(annotation['xmax'])
        ymins.append(annotation['ymin'])
        ymaxs.append(annotation['ymax'])
        classes_text.append(annotation['object_name'].encode('utf8'))
        difficult.append(annotation['object_difficult'])
        truncated.append(annotation['object_truncated'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/width': dataset_util.int64_feature(width),
            'image/height': dataset_util.int64_feature(height),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_jpg),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bndbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bndbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bndbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bndbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
            'image/object/difficult': dataset_util.int64_list_feature(difficult),
            'image/object/truncated': dataset_util.int64_list_feature(truncated),
        }
    ))
    return tf_example
def main():
    # Define file paths
    csv_input = "/content/NEU-DET/train/training_annotations.csv"
    image_dir = "/content/Train_Images/images"

    # Read CSV file containing image annotations
    annotations = pd.read_csv(csv_input)

    # Iterate through each unique filename in the CSV file
    for filename in annotations['filename'].unique():
        output_path = f"/content/Train_tfrecords/output_{os.path.splitext(filename)[0]}.tfrecord"

        # Create a TFRecord for the current image
        tf_example = create_tf_example(filename, image_dir, annotations)

        # Write TFRecord to file
        with tf.io.TFRecordWriter(output_path) as writer:
            writer.write(tf_example.SerializeToString())

        print(f'Successfully created TFRecord for {filename}. Output path: {output_path}')

if __name__ == "__main__":
    main()

#Creating Validation TF Records

In [None]:
import pandas as pd

# Read the CSV file
csv_file = "/content/NEU-DET/validation/validation_annotations.csv"
df = pd.read_csv(csv_file)

# Iterate through the filenames and append ".jpg" if not already present
for i, filename in enumerate(df['filename']):
    if not filename.endswith(".jpg"):
        df.loc[i, 'filename'] += ".jpg"

# Save the modified DataFrame back to the CSV file
df.to_csv(csv_file, index=False)

In [None]:
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple

def create_tf_example(filename, image_dir, annotations):
    image_path = os.path.join(image_dir, filename)
    with tf.io.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = filename.encode('utf8')
    source_id = filename
    encoded_raw_data = encoded_jpg
    image_format = b'jpg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    difficult = []
    truncated = []

    # Extract annotations for the current image from the annotations DataFrame
    image_annotations = annotations[annotations['filename'] == filename.decode('utf-8')]
    for index, annotation in image_annotations.iterrows():
        xmins.append(annotation['xmin'])
        xmaxs.append(annotation['xmax'])
        ymins.append(annotation['ymin'])
        ymaxs.append(annotation['ymax'])
        classes_text.append(annotation['object_name'].encode('utf8'))
        difficult.append(annotation['object_difficult'])
        truncated.append(annotation['object_truncated'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/width': dataset_util.int64_feature(width),
            'image/height': dataset_util.int64_feature(height),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_raw_data),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bndbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bndbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bndbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bndbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
            'image/object/difficult': dataset_util.int64_list_feature(difficult),
            'image/object/truncated': dataset_util.int64_list_feature(truncated),
        }
    ))
    return tf_example

def main():
    # Define file paths
    csv_input = "/content/NEU-DET/validation/validation_annotations.csv"
    image_dir = "/content/Validation_Images/images"

    # Read CSV file containing image annotations
    annotations = pd.read_csv(csv_input)

    # Iterate through each unique filename in the CSV file
    for filename in annotations['filename'].unique():
        output_path = f"/content/Validation_tfrecords/output_{os.path.splitext(filename)[0]}.tfrecord"

        # Create a TFRecord for the current image
        tf_example = create_tf_example(filename, image_dir, annotations)

        # Write TFRecord to file
        with tf.io.TFRecordWriter(output_path) as writer:
            writer.write(tf_example.SerializeToString())

        print(f'Successfully created TFRecord for {filename}. Output path: {output_path}')

if __name__ == "__main__":
    main()


# Testing data in the tf records

In [None]:
filenames = ["/content/Train_tfrecords/output_crazing_21.tfrecord"]
raw_dataset = tf.data.TFRecordDataset(filenames)
raw_dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [None]:
for raw_record in raw_dataset.take(1):
  example = tf.train.Example()
  example.ParseFromString(raw_record.numpy())
  print(example)

In [None]:
import tensorflow as tf

tfrecord_file = '/content/Train_tfrecords/output_crazing_21.tfrecord'

# Iterate over the TFRecord file and print the contents of each record
for raw_record in tf.data.TFRecordDataset(tfrecord_file):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)


features {
  feature {
    key: "image/encoded"
    value {
      bytes_list {
        value: "\377\330\377\340\000\020JFIF\000\001\001\001\000`\000`\000\000\377\333\000C\000\002\001\001\002\001\001\002\002\002\002\002\002\002\002\003\005\003\003\003\003\003\006\004\004\003\005\007\006\007\007\007\006\007\007\010\t\013\t\010\010\n\010\007\007\n\r\n\n\013\014\014\014\014\007\t\016\017\r\014\016\013\014\014\014\377\333\000C\001\002\002\002\003\003\003\006\003\003\006\014\010\007\010\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\014\377\300\000\021\010\000\310\000\310\003\001\"\000\002\021\001\003\021\001\377\304\000\037\000\000\001\005\001\001\001\001\001\001\000\000\000\000\000\000\000\000\001\002\003\004\005\006\007\010\t\n\013\377\304\000\265\020\000\002\001\003\003\002\004\003\005\005\004\004\000\000\001}\001\002\003\000\004\021\005\022

#Model Training

In [None]:
import os
import tensorflow as tf

# Define Image constants
image_height = 224
image_width = 224
n_color_channels = 3  # Since images are converted to 3 channels
batch_size = 32
epochs = 10
n_classes = 6

train_tfrecords_dir = '/content/Train_tfrecords'  # Directory containing TFRecord files
test_tfrecords_dir = '/content/Validation_tfrecords'

# Define train and test datasets
train_pattern = os.path.join(train_tfrecords_dir, '*.record')
test_pattern = os.path.join(test_tfrecords_dir, '*.record')

train_tfrecords = tf.data.Dataset.list_files(train_pattern)
test_tfrecords = tf.data.Dataset.list_files(test_pattern)

# Load and preprocess datasets

def parse_tfrecord_fn(example):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/object/class/text': tf.io.FixedLenFeature([], tf.int64)  # Change data type to int64
    }
    example = tf.io.parse_single_example(example, feature_description)
    image = tf.image.decode_jpeg(example['image/encoded'], channels=1)  # Decode as single channel
    image = tf.image.resize(image, [image_height, image_width])
    image = tf.image.grayscale_to_rgb(image)  # Convert single channel to 3 channels
    image = tf.cast(image, tf.float32) / 255.0  # Normalize image to [0, 1]
    label = example['image/object/class/text']
    return image, label


def parse_tfrecords(file_path):
    return tf.data.TFRecordDataset(file_path).map(parse_tfrecord_fn)

train_dataset = train_tfrecords.interleave(parse_tfrecords, cycle_length=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

test_dataset = test_tfrecords.interleave(parse_tfrecords, cycle_length=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Define model and compile
base_model = tf.keras.applications.Xception(input_shape=(image_height, image_width, n_color_channels),
                                            include_top=False,
                                            weights="imagenet")

for layer in base_model.layers:
    layer.trainable = False

model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(n_classes, activation='relu')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train model
model.fit(train_dataset, epochs=epochs, validation_data=test_dataset)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node ParseSingleExample/ParseExample/ParseExampleV2 defined at (most recent call last):
<stack traces unavailable>
Key: image/object/class/text.  Data types don't match. Data type: string but expected type: int64
	 [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_35649]