In [None]:
!pip install torch
!pip install -qr https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import math
import cv2
import torch
import numpy as np
from pathlib import Path
from matplotlib import pyplot as plt

In [None]:
# replace this with where you saved the project directory
root_path = "/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project"
data_path = os.path.join(root_path, "cs4243_smallest")
class_labels = [name for name in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, name))]
# Uncomment to clean a specific folder
# class_labels = ["normal"]

clean_data_path = os.path.join(root_path, "image_data_cleaned")

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

  "You are about to download and run code from an untrusted repository. In a future release, this won't "
Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2022-10-4 Python-3.7.14 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5s.pt to yolov5s.pt...


  0%|          | 0.00/14.1M [00:00<?, ?B/s]


Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [None]:
def save_image(image, image_name, class_label):
    save_path = os.path.join(clean_data_path, class_label, image_name)
    cv2.imwrite(save_path, image)

# crop by expanding bounding box by 10% of image size
crop_expansion_factor = 0.1

def detect_and_crop_person(image_names, class_label):
    image_paths = [os.path.join(data_path, class_label, name) for name in image_names]
    results = model(image_paths)
    detections = results.pandas().xyxy
    success = 0
    for i, df in enumerate(detections):
        if "person" in df.name.values:
            rows = df[df.name == "person"]
            min_values = rows.min()
            max_values = rows.max()
            row_ymin = min_values["ymin"]
            row_ymax = max_values["ymax"]
            row_xmin = min_values["xmin"]
            row_xmax = max_values["xmax"]
            image = cv2.imread(image_paths[i])
            height, width, _ = image.shape
            ymin = max(0, int(row_ymin - height * crop_expansion_factor))
            ymax = min(height, int(row_ymax + height * crop_expansion_factor))
            xmin = max(0, int(row_xmin - width * crop_expansion_factor))
            xmax = min(width, int(row_xmax + width * crop_expansion_factor))
            cropped = image[ymin:ymax, xmin:xmax]
            success += 1
            save_image(cropped, image_names[i], class_label)
    # Return number of dropped examples
    return len(image_names) - success

In [None]:
# larger is faster, but uses more memory
batch_size = 128

for label in class_labels:
    print("Cleaning images from", label)
    input_path = os.path.join(data_path, label)
    output_path = os.path.join(clean_data_path, label)
    Path(output_path).mkdir(parents=True, exist_ok=True)
    image_names = [name for name in os.listdir(input_path) if os.path.isfile(os.path.join(input_path, name))]
    image_num = len(image_names)
    print("Found", image_num, "images to be processed in", math.ceil(image_num / batch_size), "steps")
    image_name_batches = [image_names[i:i+batch_size] for i in range(0, image_num, batch_size)]
    total_processed = 0
    total_dropped = 0
    for i, name_batch in enumerate(image_name_batches):
        total_dropped += detect_and_crop_person(name_batch, label)
        total_processed += batch_size
        print(f"Step {i} | Processed: {total_processed} | Dropped: {total_dropped}")

Cleaning images from normal
Found 1857 images to be processed in 15 steps
Step 0 | Processed: 128 | Dropped: 2
Step 1 | Processed: 256 | Dropped: 5
Step 2 | Processed: 384 | Dropped: 6
Step 3 | Processed: 512 | Dropped: 7
Step 4 | Processed: 640 | Dropped: 9
Step 5 | Processed: 768 | Dropped: 10
Step 6 | Processed: 896 | Dropped: 13
Step 7 | Processed: 1024 | Dropped: 14
Step 8 | Processed: 1152 | Dropped: 15
Step 9 | Processed: 1280 | Dropped: 16
Step 10 | Processed: 1408 | Dropped: 16
Step 11 | Processed: 1536 | Dropped: 18
Step 12 | Processed: 1664 | Dropped: 20
Step 13 | Processed: 1792 | Dropped: 22
Step 14 | Processed: 1920 | Dropped: 24
