Package installs necessary to run notebook in collab (or local jupyter notebook)

In [None]:
!pip install torch torchvision matplotlib


First approach ResNet with Faster R-CNN

In [None]:
from google.colab import drive
import os
import cv2
from pathlib import Path

drive.mount('/content/drive')
#replace with proper paths in the google drive containing the images
image_folder = '/content/drive/My Drive/all images'
output_folder = '/content/drive/My Drive/Faster_R-CNN'

os.makedirs(output_folder, exist_ok=True)

In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image, ImageDraw
import torchvision.transforms as T
import matplotlib.pyplot as plt
import os

model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

def transform_image(image_path):
    image = Image.open(image_path).convert('RGB')
    transform = T.Compose([T.ToTensor()])
    return transform(image).unsqueeze(0)

def detect_objects(image_path):
    image_tensor = transform_image(image_path)
    with torch.no_grad():
        predictions = model(image_tensor)[0]

    return predictions

def visualize_and_save_detections(image_path, predictions, output_image_path, output_txt_path, threshold=0.5):
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)

    with open(output_txt_path, 'w') as f:
        for i, box in enumerate(predictions['boxes']):
            score = predictions['scores'][i].item()
            if score > threshold:
                x_min, y_min, x_max, y_max = box
                label = predictions['labels'][i].item()
                draw.rectangle(((x_min, y_min), (x_max, y_max)), outline="red", width=2)
                draw.text((x_min, y_min - 10), f'Label: {label}, Score: {score:.2f}', fill="red")
                f.write(f'{label} {score:.2f} {x_min:.0f} {y_min:.0f} {x_max:.0f} {y_max:.0f}\n')

    image.save(output_image_path)

for image_filename in os.listdir(image_folder):
    if image_filename.endswith('.jpg') or image_filename.endswith('.png'):
        image_path = os.path.join(image_folder, image_filename)
        output_image_path = os.path.join(output_folder, f"output_{image_filename}")
        output_txt_path = os.path.join(output_folder, f"output_{image_filename.split('.')[0]}.txt")

        predictions = detect_objects(image_path)
        visualize_and_save_detections(image_path, predictions, output_image_path, output_txt_path)


Mask R-CNN

In [4]:
output_folder2 = '/content/drive/My Drive/Mask_R-CNN'

os.makedirs(output_folder2, exist_ok=True)

In [None]:
from torchvision.models.detection import maskrcnn_resnet50_fpn
import numpy as np

model = maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

def detect_and_segment_objects(image_path):
    image_tensor = transform_image(image_path)
    with torch.no_grad():
        predictions = model(image_tensor)[0]

    return predictions

def visualize_and_save_detections(image_path, predictions, output_image_path, output_txt_path, threshold=0.5):
    image = Image.open(image_path).convert('RGB')
    draw = ImageDraw.Draw(image)

    with open(output_txt_path, 'w') as f:
        for i, box in enumerate(predictions['boxes']):
            score = predictions['scores'][i].item()
            if score > threshold:
                x_min, y_min, x_max, y_max = box
                label = predictions['labels'][i].item()

                mask = predictions['masks'][i, 0].cpu().numpy()
                mask = mask > 0.5

                draw.rectangle(((x_min, y_min), (x_max, y_max)), outline="red", width=2)
                draw.text((x_min, y_min - 10), f'Label: {label}, Score: {score:.2f}', fill="red")

                mask_image = np.array(image)
                mask_image[mask] = (255, 0, 0)
                image = Image.fromarray(mask_image)

                f.write(f'{label} {score:.2f} {x_min:.0f} {y_min:.0f} {x_max:.0f} {y_max:.0f}\n')

    image.save(output_image_path)

for image_filename in os.listdir(image_folder):
    if image_filename.endswith('.jpg') or image_filename.endswith('.png'):
        image_path = os.path.join(image_folder, image_filename)
        output_image_path = os.path.join(output_folder2, f"output_{image_filename}")
        output_txt_path = os.path.join(output_folder2, f"output_{image_filename.split('.')[0]}.txt")

        predictions = detect_and_segment_objects(image_path)

        visualize_and_save_detections(image_path, predictions, output_image_path, output_txt_path)
