In [None]:
# Cell 1: Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import torch
import torchvision
from torchvision import models, transforms
from torchvision.models.detection import maskrcnn_resnet50_fpn
from PIL import Image
import os

cuda_available = torch.cuda.is_available()

print(f"CUDA Available: {cuda_available}")

if cuda_available:
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available. PyTorch will use the CPU.")

In [None]:
import torchvision

model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=torchvision.models.detection.MaskRCNN_ResNet50_FPN_Weights.DEFAULT)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()


In [None]:
# Cell 3: Define the COCO labels
coco_labels = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

vehicle_labels = ['car', 'motorcycle', 'bus', 'truck']

In [None]:
# Cell 4: Define the transformation
transform = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
# Cell 5: Detect objects in an image
def detect_objects(image):
    image_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image_tensor)

    boxes = outputs[0]['boxes'].cpu().numpy()
    labels = outputs[0]['labels'].cpu().numpy()
    scores = outputs[0]['scores'].cpu().numpy()
    masks = outputs[0]['masks'].cpu().numpy()

    detected_objects = []

    for box, label, score, mask in zip(boxes, labels, scores, masks):
        if score >= 0.5:
            detected_objects.append((box, coco_labels[label], score, mask))

    return detected_objects

In [None]:
def adjust_coordinates(box, original_size, preprocessed_size):
    original_height, original_width = original_size
    preprocessed_height, preprocessed_width = preprocessed_size

    y_ratio = original_height / preprocessed_height
    x_ratio = original_width / preprocessed_width

    xmin, ymin, xmax, ymax = box
    xmin = int(xmin * x_ratio)
    xmax = int(xmax * x_ratio)
    ymin = int(ymin * y_ratio)
    ymax = int(ymax * y_ratio)

    return xmin, ymin, xmax, ymax

def visualize_detections(image_path, detected_objects, scale_x, scale_y, confidence_threshold=0.7):
    image = cv2.imread(image_path)
    image_with_detections = image.copy()

    for box, label, score, _ in detected_objects:
        if score >= confidence_threshold:
            xmin, ymin, xmax, ymax = box

            xmin_adjusted = int(xmin / scale_x)
            ymin_adjusted = int(ymin / scale_y)
            xmax_adjusted = int(xmax / scale_x)
            ymax_adjusted = int(ymax / scale_y)

            print(f"Original Box Coordinates: ({xmin}, {ymin}), ({xmax}, {ymax})")
            print(f"Adjusted Box Coordinates: ({xmin_adjusted}, {ymin_adjusted}), ({xmax_adjusted}, {ymax_adjusted})")

            cv2.rectangle(image_with_detections, (xmin_adjusted, ymin_adjusted), (xmax_adjusted, ymax_adjusted), (0, 255, 0), 2)
            cv2.putText(image_with_detections, f"{label}: {score:.2f}", (xmin_adjusted, ymin_adjusted - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    return image_with_detections

In [None]:
def preprocess_image(image_path, target_size=(800, 800)):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    h, w, _ = image.shape
    scale = min(target_size[0] / h, target_size[1] / w)
    new_size = (int(w * scale), int(h * scale))
    
    resized_image = cv2.resize(image, new_size)
    
    padded_image = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    padded_image[:resized_image.shape[0], :resized_image.shape[1], :] = resized_image
    
    return padded_image, scale, scale

In [None]:
def crop_and_upscale_object(image, box, upscale_factor=4):
    xmin, ymin, xmax, ymax = box
    cropped_object = image[ymin:ymax, xmin:xmax]
    
    upscaled_object = cv2.resize(cropped_object, None, fx=upscale_factor, fy=upscale_factor, interpolation=cv2.INTER_CUBIC)
    
    return upscaled_object

os.makedirs("cropped-objects", exist_ok=True)

import glob

dataset_path = "dataset/raw-images"
rgb_image_paths = []

for subfolder in os.listdir(dataset_path):
    if "rgb" in subfolder.lower():
        subfolder_path = os.path.join(dataset_path, subfolder)
        if os.path.isdir(subfolder_path):
            rgb_image_paths.extend(glob.glob(os.path.join(subfolder_path, "*.jpg")))
            rgb_image_paths.extend(glob.glob(os.path.join(subfolder_path, "*.png")))

np.random.shuffle(rgb_image_paths)

num_test_images = 10
test_image_paths = rgb_image_paths[:num_test_images]

for image_path in test_image_paths:
    original_image = cv2.imread(image_path)
    preprocessed_image, scale_x, scale_y = preprocess_image(image_path)
    print("Original Image Shape:", original_image.shape)
    print("Preprocessed Image Shape:", preprocessed_image.shape)
    print("Scaling Factors: x =", scale_x, "y =", scale_y)
    
    pil_image = Image.fromarray(preprocessed_image)
    
    detected_objects = detect_objects(pil_image)
    print("Detected Objects:")
    for obj in detected_objects:
        print(obj)
    
    for i, (box, label, score, _) in enumerate(detected_objects):
        if score >= 0.7:  
            xmin, ymin, xmax, ymax = box
            xmin_adjusted = int(xmin / scale_x)
            ymin_adjusted = int(ymin / scale_y)
            xmax_adjusted = int(xmax / scale_x)
            ymax_adjusted = int(ymax / scale_y)
            
            adjusted_box = (xmin_adjusted, ymin_adjusted, xmax_adjusted, ymax_adjusted)
            upscaled_object = crop_and_upscale_object(original_image, adjusted_box, upscale_factor=4)
            
            object_filename = f"{os.path.splitext(os.path.basename(image_path))[0]}_{i}.jpg"
            object_path = os.path.join("cropped-objects", object_filename)
            cv2.imwrite(object_path, upscaled_object)
    
    image_with_detections = visualize_detections(image_path, detected_objects, scale_x, scale_y)
    
    print("Image with Detections Shape:", image_with_detections.shape)
    
    cv2.imshow("Image with Detections", image_with_detections)
    cv2.waitKey(0)
    cv2.destroyAllWindows()