# Detecto Object Detection Tutorial



**See tutorials:**
- https://www.analyticsvidhya.com/blog/2021/06/simplest-way-to-do-object-detection-on-custom-datasets/

- https://github.com/alankbi/detecto

**and**
- https://towardsdatascience.com/build-a-custom-trained-object-detection-model-with-5-lines-of-code-713ba7f6c0fb

Segmentation in the context of computer vision refers to the process of dividing an image into multiple segments or regions. Object segmentation, in particular, involves identifying and delineating individual objects within an image. Detecto is an open-source Python library designed for object detection tasks. Detecto is built on top of PyTorch, a popular open-source deep learning library.


**About detecto**: https://detecto.readthedocs.io/en/latest/

# Install PyTorch and Detecto

In [None]:
#!pip3 install torch torchvision

In [None]:
# Check whether your computer has a CUDA-enabled GPU - to increase computing performance
import torch
print(torch.cuda.is_available())

In [None]:
#!pip install detecto

# Import librairies

In [None]:
# Detecto
from detecto import core, utils, visualize
from detecto.visualize import show_labeled_image, plot_prediction_grid
from detecto.utils import read_image

# If you use Google Colab
from google.colab import drive

# TorchVision
from torchvision import transforms

# Third-Party Librairy
import matplotlib.pyplot as plt
import numpy as np
import os
import glob, os
import pandas as pd
import plotly.express as px

# Path to target folders in Google Drive

In [None]:
drive.mount('/content/drive')

os.chdir('Path to your training data repository')

## Custom image augmentation

In [None]:
"""Defines a comprehensive data transformation pipeline using PyTorch's torchvision.transforms module.
The pipeline consists of a series of image transformations commonly employed in deep learning tasks,
particularly for convolutional neural networks (CNNs) and image datasets."""

# Compose the transformations into a sequential pipeline using transforms.Compose.
transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert the input image (assumed to be a Tensor) to a PIL (Python Imaging Library) image.
    transforms.Resize(900),   # Resize the image to the specified size (900 in this case).
    transforms.RandomHorizontalFlip(0.5),  # Apply a random horizontal flip with a probability of 0.5 for data augmentation.
    transforms.ColorJitter(saturation=0.2),  # Randomly adjust brightness, contrast, saturation, and hue for further data augmentation.
    transforms.ToTensor(),  # Convert the PIL image to a PyTorch Tensor and normalize pixel values to the range [0.0, 1.0].
    utils.normalize_transform(),  # Apply additional normalization using a function defined in the utils module.
])

"""The resulting 'transform' is designed to be applied to input images during the data loading process,
preparing them for consumption by a deep learning model. The order of transformations is crucial,
and they are applied sequentially as specified in the transforms.Compose list."""


## Change the dataformat from xml_files to csv

In [None]:
# Invokes the 'xml_to_csv' function with the path 'train_xml/' representing the directory
# containing XML files for training data and 'train_labels.csv' as the desired CSV file name.
utils.xml_to_csv('train_xml/', 'train_labels.csv')

# The second line does the same for the validation data. It calls 'xml_to_csv' with 'val_xml/' as the
# directory containing XML files for validation data and 'val_labels.csv' as the target CSV file name.
utils.xml_to_csv('val_xml/', 'val_labels.csv')

# Model Training and saving

In [None]:
"""Create a training dataset using the 'core.Dataset' class. The dataset is initialized with the path to a CSV file
containing annotations for the training images ('train_labels.csv'), the directory containing the training images
('train_jpg/'), and a data transformation pipeline called 'transform' which includes image preprocessing steps."""
Train_dataset = core.Dataset("train_labels.csv", "train_jpg/", transform=transform)

"""Create a validation dataset using the 'core.Dataset' class. Similar to the training dataset, it is initialized with
the path to a CSV file containing annotations for the validation images ('val_labels.csv'), the directory containing
the validation images ('val_jpg/'), and the same data transformation pipeline 'transform.'"""
val_dataset = core.Dataset("val_labels.csv", "val_jpg/", transform=transform)


In [None]:
train_loader = core.DataLoader(Train_dataset, batch_size=2, shuffle=True)# DataLoader for train dataset

In [None]:
print(f"Number of training samples: {len(Train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}\n")
model = core.Model(["label"]) # Classes in our dataset
#model.get_internal_model

In [None]:
# Train the model using the 'fit' method, providing the training data loader ('train_loader'),
# validation dataset ('val_dataset'), and additional training parameters.
losses = model.fit(train_loader, val_dataset, epochs=10, lr_step_size=5, learning_rate=0.001, verbose=True)

# Print a separator line and then save the trained model with the filename "model_segmentation_label.pth."
print("-----------\n")
model.save("model_segmentation_label.pth") # Name the model
print('SAVING MODEL COMPLETE...\n')
print("-----------\n")

# Plot the training and validation losses over the epochs using matplotlib.
epochs = range(0, 10)
plt.figure(figsize=(15, 10))
plt.plot(epochs, losses, 'b')  # Plot the losses in blue.
plt.title('Training and Validation losses')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()  # Display the legend (assuming there are multiple curves to be shown).
plt.show()

# Load Model

In [None]:
"""Load a pre-trained model from the file "model_segmentation_label.pth" using the 'load' method
from the 'core.Model' class. The second argument (["label"]) suggests that the model is loaded
with a specific set of class labels; the labels have to be listed in the same order as they were listed in the line above "model = core.Model(["label"])"""

model = core.Model.load("Path to the directory where the model is saved", ["label"])

# Test Model


In [None]:
# Specify the path to your test images
image = utils.read_image('test/00acfab7-6f73-40bc-a209-599aee67f795_label_front_0004_label.jpg')
predictions = model.predict(image)

labels, boxes, scores = predictions
print(labels) # Class
print(boxes) # Coordinates
print(scores) # Prediction scores between 0 and 1

In [None]:
show_labeled_image(image, boxes, labels)

In [None]:
# Set the threshold value between 0 and 1 for filtering the prediction scores.
# Only predictions with confidence scores greater than 'thresh' will be considered.
thresh = 0.8

# Use NumPy to find the indices where the confidence scores exceed the threshold.
filtered_indices = np.where(scores > thresh)

# Extract the filtered scores, boxes, and labels based on the indices obtained above.
filtered_scores = scores[filtered_indices]
filtered_boxes = boxes[filtered_indices]
num_list = filtered_indices[0].tolist()
filtered_labels = [labels[i] for i in num_list]

# Display the image with the filtered bounding boxes and corresponding labels.
show_labeled_image(image, filtered_boxes, filtered_labels)


## Predictions

In [None]:
# Initialize an empty list to store predictions for all images.
all_predictions = []

# Iterate over each file in the "test/" directory with a ".jpg" extension.
for file in glob.glob(f"test/*.jpg"):
    # Read the image using the 'read_image' function from the 'utils' module.
    image = utils.read_image(file)
    # Perform predictions using the pre-trained segmentation model ('model').
    predictions = model.predict(image)
    labels, boxes, scores = predictions
    # Set the threshold value for filtering predictions.
    thresh = 0.8
    # Use NumPy to find the indices where the confidence scores exceed the threshold.
    filtered_indices = np.where(scores > thresh)
    # Extract the filtered scores, boxes, and labels based on the indices obtained above.
    filtered_scores = scores[filtered_indices]
    filtered_boxes = boxes[filtered_indices]
    num_list = filtered_indices[0].tolist()
    filtered_labels = [labels[i] for i in num_list]
    # Visualize the labeled image with the filtered bounding boxes and corresponding labels.
    show_labeled_image(image, filtered_boxes, filtered_labels)
    # Store predictions for the current image in the 'all_predictions' list.
    all_predictions.append(predictions)

# Save Predictions in CSV

In [None]:
def create_predictions(model,path):
  """
  Uses the trained model created by Detecto and tries to predict boxes of all files
  in a directory. It then returns a pandas Dataframe

  Args:
            model(detecto.core.Model): model created with detecto
            path (str): path where the pictures are located

  Returns:
            DataFrame: pandas Dataframe with the results

  """
  all_predictions = []
  print("Predicting coordinates")
  for file in glob.glob(f"{path}/*.jpg"):
    # Specify the path to your image
    image = utils.read_image(file)
    predictions = model.predict(image)
    # Predictions format: (labels, boxes, scores)
    labels, boxes, scores = predictions
    for i, labelname in enumerate(labels):
      entry = {}
      entry['filename'] = os.path.basename(file) # Gets the filename without the dir
      entry['class'] = labelname
      entry['score'] = scores[i]
      entry['xmin'] = boxes[i][0]
      entry['ymin'] = boxes[i][1]
      entry['xmax'] = boxes[i][2]
      entry['ymax'] = boxes[i][3]
      all_predictions.append(entry)
  df = pd.DataFrame(all_predictions)

  return df

In [None]:
df = create_predictions(model, "test/")

In [None]:
def get_clean_dataframe(dataframe, threshold = 0.8):
  """
  Creates a clean dataframe only with boxes exceeding a given threshold score.

  Args:
            dataframe(pandas.DataFrame): pandas Dataframe with predicted labels
            threshold(int): threshold value for scores

    Returns:
            DataFrame: pandas Dataframe with the trimmed results

  """
  df = dataframe
  # Clean the data digits
  colnames = ['score','xmin', 'ymin', 'xmax', 'ymax']
  for header in colnames:
    # Trimm the cells so that they only contain integers
    df[header] = df[header].astype('str').str.extractall('(\d+.\d+)').unstack().fillna('').sum(axis=1).astype(float)

  df = df.loc[ df['score'] >= threshold ] # Keep only rows where the score exceeds the threshold

  return df

In [None]:
df = get_clean_dataframe(df)

In [None]:
df.to_csv('predicted.csv') # Save Clean Dataframe with Threshold