# Machine Learning Receipt Object Detection [CH2-PS579]


*   Muhammad Aditya Hasta Pratama (M299BSY0188) - ML - Universitas Pendidikan Indonesia / Active
*   Shereva Miranda (M002BSX0590) - ML - Institut Teknologi Bandung / Active
*   Reza Nugraha (M002BSY1855) - ML - Institut Teknologi Bandung / Inactive

</br>

Reference Tutorial :     

*   [Tensorflow 2 Custom Object Detection Model by Lazy Tech](https://www.youtube.com/watch?v=8ktcGQ-XreQ&t=553s&ab_channel=LazyTech).
*   [Train a Deep Learning Model for Custom Object Detection Using TensorFlow by TechZizou](https://www.youtube.com/watch?v=amURyS6CAaY&t=69s&ab_channel=techzizou)


# 1) PREPARATION

Library and installation that are needed for running the architecture.

In [None]:
# Install tensorFlow 2.13.0
!pip install tensorflow=="2.13.0"

In [2]:
# Import Libraries
import os
import glob
import xml.etree.ElementTree as ET
import pandas as pd
import pathlib

In [None]:
# Download models for object detection
if "models" in pathlib.Path.cwd().parts:
  while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('models').exists():
  !git clone --depth 1 https://github.com/tensorflow/models

In [None]:
# Install the object detection API
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
# Testing model builder
!python /content/models/research/object_detection/builders/model_builder_tf2_test.py

# 2) DONWLOAD DATASET (KAGGLE)

Preparing receipt dataset, tfrecords, and labelmap from kaggle.

Link = https://www.kaggle.com/datasets/mdhstama23/receipt-invoice-ml-ch2ps357

*note: we forgot our number team so the link kaggle is different from acctually*

In [None]:
# Install kaggle API
!pip install -q kaggle
!pip install -q kaggle-cli

To get yours username and key kaggle API, makes sure you have a kaggle account.

If you have already created a kaggle account or previously had a kaggle account, please follow these steps:


1.   Go to your **ACCOUNT SETTINGS**
2.   Click on **CREATE NEW TOKEN**, it will be automically download the kaggle.json file
3.   Open the .json files and extract the username and key. Use the obtained values in the code below




In [7]:
# Setting environtment for kaggle API
# Change the username or the key to match yours
username = "mdhstama23"
key = "f084d767ac2346b9cc7ca59718be801b"
os.environ['KAGGLE_USERNAME'] = username
os.environ['KAGGLE_KEY'] = key

In [None]:
# Download the dataset
!kaggle datasets download -d mdhstama23/receipt-invoice-ml-ch2ps357 --unzip
!ls

# 3) CONFIGURE THE DATASET

Preparing the csv and record files for training the model.

In [9]:
# Split the images labels for dataset

# go to the directory
%cd /content/dataset/data/

# Create new folder for split labels
!mkdir test_labels train_labels

# Lists the files inside 'annotations' in a random order
# Moves the 100/500 labels (20% of the labels) to the testing directory `test_labels`
!ls annotations/* | sort -R | head -100 | xargs -I{} mv {} test_labels/

# Moves the rest (400 labels) of the labels (80% of the labels ) to the training directory `train_labels`
!ls annotations/* | xargs -I{} mv {} train_labels/

/content/dataset/data


In [None]:
# Create CSV files from the XML labels and the label_map.pbtxt files

def xml_to_csv(path):
    # Lists to store class names and XML data
    classes_names = []
    xml_list = []

    # Loop through each XML file in the specified path
    for xml_file in glob.glob(path + '/*.xml'):
        # Parse the XML file
        tree = ET.parse(xml_file)
        root = tree.getroot()

        # Loop through each 'object' element in the XML
        for member in root.findall('object'):
            # Add class name to the list
            classes_names.append(member[0].text)

            # Extract relevant information and create a tuple
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text))

            # Add the tuple to the XML data list
            xml_list.append(value)

    # Define column names for the DataFrame
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']

    # Create a DataFrame from the XML data
    xml_df = pd.DataFrame(xml_list, columns=column_name)

    # Remove duplicate class names, sort, and store in 'classes_names'
    classes_names = list(set(classes_names))
    classes_names.sort()

    # Return the DataFrame and the list of unique class names
    return xml_df, classes_names

# Process both 'train_labels' and 'test_labels' directories
for label_path in ['train_labels', 'test_labels']:
    # Construct the full path to the label directory
    image_path = os.path.join(os.getcwd(), label_path)

    # Call the xml_to_csv function to convert XML to CSV
    xml_df, classes = xml_to_csv(label_path)

    # Save the DataFrame to a CSV file
    xml_df.to_csv(f'{label_path}.csv', index=None)

    # Print success message
    print(f'Successfully converted {label_path} xml to csv.')

# Create the label_map.pbtxt file
label_map_path = os.path.join("label_map.pbtxt")
pbtxt_content = ""

# Loop through each class and format the content for label_map.pbtxt
for i, class_name in enumerate(classes):
    pbtxt_content = (
        pbtxt_content
        + "item {{\n    id: {0}\n    name: '{1}'\n}}\n\n".format(i + 1, class_name)
    )

# Remove trailing whitespace and write content to label_map.pbtxt
pbtxt_content = pbtxt_content.strip()
with open(label_map_path, "w") as f:
    f.write(pbtxt_content)
    print('Successfully created label_map.pbtxt ')

In [None]:
# Create test and train.record files

#For train.record
!python /content/dataset/generate_tfrecord.py train_labels.csv  label_map.pbtxt images/ train.record

#For test.record
!python /content/dataset/generate_tfrecord.py test_labels.csv  label_map.pbtxt images/ test.record

# 4) ARCHITECTURE OR MODEL CONFIGURATION

Configuration training model with model that avaiable in [Tensorflow 2 Detection Model Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md).

In [6]:
# Initialization
num_classes = 1
fine_tune_checkpoint_type = 'detection'
batch_size = 64
num_steps = 1000
num_eval_steps = 1000

In [None]:
# Download the architecture

# Move the directory
%cd /content/dataset/data

# Change this link and name if you want to train with another model
link_model = "http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz"
name_model = "ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz"

# download the model
!wget {link_model}
!tar -xf {name_model}

In [1]:
# Download the config file

# Change this link config based on your preferred model before
config_link = "https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config"

# Download the model
!wget {config_link}

--2023-12-21 07:29:36--  https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/ssd_mobilenet_v2_320x320_coco17_tpu-8.config
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4484 (4.4K) [text/plain]
Saving to: ‘ssd_mobilenet_v2_320x320_coco17_tpu-8.config’


2023-12-21 07:29:36 (31.2 MB/s) - ‘ssd_mobilenet_v2_320x320_coco17_tpu-8.config’ saved [4484/4484]



In [4]:
# Change this link based on your preferred model before
fine_tune_checkpoint = '/content/dataset/data/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'
base_config_path = '/content/ssd_mobilenet_v2_320x320_coco17_tpu-8.config'

# Path initialization
train_record_path = '/content/dataset/data/train.record'
test_record_path = '/content/dataset/data/test.record'
labelmap_path = '/content/dataset/data/label_map.pbtxt'

In [7]:
# Edit the config file

import re

with open(base_config_path) as f:
    config = f.read()

with open('model_config.config', 'w') as f:

  # Set number of classes.
  config = re.sub('num_classes: [0-9]+',
                  'num_classes: {}'.format(num_classes), config)

  # Set fine_tune_checkpoint path
  config = re.sub('fine_tune_checkpoint: ".*?"',
                  'fine_tune_checkpoint: "{}"'.format(fine_tune_checkpoint), config)

  # Set fine-tune checkpoint type to detection
  config = re.sub('fine_tune_checkpoint_type: "classification"',
             'fine_tune_checkpoint_type: "{}"'.format('detection'), config)

  # Set batch size
  config = re.sub('batch_size: [0-9]+',
                  'batch_size: {}'.format(batch_size), config)

  # Set training steps
  config = re.sub('num_steps: [0-9]+',
                  'num_steps: {}'.format(num_steps), config)

  # Set labelmap path
  config = re.sub('label_map_path: ".*?"',
             'label_map_path: "{}"'.format(labelmap_path), config)

  # Set train tf-record file path
  config = re.sub('(input_path: ".*?)(PATH_TO_BE_CONFIGURED/train)(.*?")',
                  'input_path: "{}"'.format(train_record_path), config)

  # Set test tf-record file path
  config = re.sub('(input_path: ".*?)(PATH_TO_BE_CONFIGURED/val)(.*?")',
                  'input_path: "{}"'.format(test_record_path), config)

  f.write(config)

In [None]:
# Load tensorboard

%load_ext tensorboard
%tensorboard --logdir '/content/dataset/training'

# 5) TRAINING THE MODEL

Traiing the model based the architecture configuration before

In [None]:
# Initialization

model_dir = '/content/dataset/data/train_history'
pipeline_config_path = '/content/dataset/data/model_config.config'

In [None]:
# Run the model

!python /content/models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={pipeline_config_path} \
    --model_dir={model_dir} \
    --alsologtostderr \
    --num_train_steps={num_steps} \
    --sample_1_of_n_eval_examples=1 \
    --num_eval_steps={num_eval_steps}

# 6) ERROR ENCOUNTERED

If you encountered an error like this

"Tensorflow: AttributeError: module 'tensorflow.python.ops.control_flow_ops' has no attribute 'case'"

Please run again the tensorflow installation above and ignore it if the collab tells you to restart the session.

Then run it again the steps 5.

```
!pip install tensorflow=="2.13.0"
```

# 7) RETRAINING THE MODEL (IN CASE BECAUSE LIMITIATION OF COLLABS OR DISCONNECT)

If you expereience error because collab limitation or disconnect, you can run this code and run again the step 5.

However, change the number of checkpoint. You can check the number inside folder of train_history.

The model_main_tf2.py script saves the checkpoint every 1000 steps. The training automatically restarts from the last saved checkpoint itself.


In [31]:
# Edit this number based on the checkpoint number
number_checkpoint = 1

# Path initialization
new_base_config_path = '/content/dataset/data/model_config.config'
new_fine_tune_checkpoint = f'/content/dataset/data/train_history/ckpt-{number_checkpoint}'

# Edit the config file
import re

with open(new_base_config_path) as f:
    config = f.read()

with open('model_config.config', 'w') as f:

    # Set fine_tune_checkpoint path
    config = re.sub('fine_tune_checkpoint: ".*?"',
                    'fine_tune_checkpoint: "{}"'.format(new_fine_tune_checkpoint), config)

    f.write(config)

In [None]:
# Check the config file

%cat model_config.config

# 8) TESTING THE MODEL

Export the model of training so the model can be used for the next step, which is OCR.

In [None]:
# Navigate to the object_detection folder
%cd /content/models/research/object_detection

In [None]:
# Export inference graph
!python exporter_main_v2.py --trained_checkpoint_dir=/content/dataset/data/train_history --pipeline_config_path={pipeline_config_path} --output_directory /content/dataset/data/output/inference_graph

In [None]:
# Loading the saved_model

import tensorflow as tf
import time
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from PIL import Image
from google.colab.patches import cv2_imshow
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

# Output display size
IMAGE_SIZE = (12, 8)

# Path to the saved model
PATH_TO_SAVED_MODEL = "/content/dataset/output/inference_graph/saved_model"
print('Loading model...', end='')

# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)
print('Done!')

# Loading the label_map
# Specify the path to the label_map file
category_index = label_map_util.create_category_index_from_labelmap("/content/dataset/data/label_map.pbtxt", use_display_name=True)

def load_image_into_numpy_array(path):
    return np.array(Image.open(path))

# CHANGE THIS PATH IF YOU WANT TO TEST ANOTHER IMAGE
image_path = "/content/dataset/testing_images/receipt2363.jpg"

# Load the image into a numpy array
image_np = load_image_into_numpy_array(image_path)

# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image_np)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis, ...]

# Perform object detection on the input image
detections = detect_fn(input_tensor)

# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
              for key, value in detections.items()}
detections['num_detections'] = num_detections

# Detection classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

# Create a copy of the image with detected boxes and labels
image_np_with_detections = image_np.copy()

# Visualize boxes and labels on the image
viz_utils.visualize_boxes_and_labels_on_image_array(
    image_np_with_detections,
    detections['detection_boxes'],
    detections['detection_classes'],
    detections['detection_scores'],
    category_index,
    use_normalized_coordinates=True,
    max_boxes_to_draw=200,
    min_score_thresh=.1,  # Adjust this value to set the minimum probability boxes to be classified as True
    agnostic_mode=False
)

# Display the image with detected objects
%matplotlib inline
plt.figure(figsize=IMAGE_SIZE, dpi=200)
plt.axis("off")
plt.imshow(image_np_with_detections)
plt.show()