In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Load Images: Load all images from the specified folder.**

In [1]:
import os
from PIL import Image
import random
import itertools
from collections import defaultdict
from typing import List, Tuple, Dict, Literal
import torch
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from pathlib import Path


In [3]:
def load_images_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(folder_path, filename)
            img = Image.open(img_path).convert("RGBA")
            images.append((filename, img))
    return images


image_folder = "/content/drive/MyDrive/Week12/Asserts/fef95c5e1ee5bc235b56d7c508d3bcd0"
images = load_images_from_folder(image_folder)
images

[('endframe_1.jpg', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('_preview.png', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('landing_2.jpg', <PIL.Image.Image image mode=RGBA size=600x385>),
 ('advertised_item.png', <PIL.Image.Image image mode=RGBA size=224x119>),
 ('cta.png', <PIL.Image.Image image mode=RGBA size=300x64>),
 ('endframe_2.png', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('endframe_3.png', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('endframe_5.png', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('endframe_4.png', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('endframe_6.png', <PIL.Image.Image image mode=RGBA size=600x900>),
 ('engagement_animation.png', <PIL.Image.Image image mode=RGBA size=90x108>),
 ('engagement_instruction.png', <PIL.Image.Image image mode=RGBA size=466x77>),
 ('gametext_1.png', <PIL.Image.Image image mode=RGBA size=336x61>),
 ('gametext_2.png', <PIL.Image.Image image mode=RGBA size=336x61>),
 ('landing_1.png', <PI

**Object detection**

In [4]:
# Object Detection model
def load_detection_model():
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

def detect_objects(image: Image.Image, model) -> List[Dict[str, Tuple]]:
    transform = T.Compose([T.ToTensor()])
    img_tensor = transform(image)
    predictions = model([img_tensor])[0]
    return predictions

def label_objects(predictions) -> Dict[str, Tuple]:
    # Assuming we have a function that maps detection labels to our categories
    labels = predictions['labels']
    boxes = predictions['boxes']
    labeled_boxes = {}
    for label, box in zip(labels, boxes):
        category = map_label_to_category(label)
        labeled_boxes[category] = box
    return labeled_boxes

In [5]:
"""def detect_objects(image):
    # Placeholder for object detection logic
    detected_objects = ["logo", "text", "background_image", "CTA_button", "end_frame"]
    return detected_objects

# Detect objects in each image
image_objects = [(filename, detect_objects(img)) for filename, img in images]

# Assuming images is a list of tuples (filename, img)
image_objects = [(filename, detect_objects(img, model)) for filename, img in images]
"""


'def detect_objects(image):\n    # Placeholder for object detection logic\n    detected_objects = ["logo", "text", "background_image", "CTA_button", "end_frame"]\n    return detected_objects\n\n# Detect objects in each image\nimage_objects = [(filename, detect_objects(img)) for filename, img in images]\n\n# Assuming images is a list of tuples (filename, img)\nimage_objects = [(filename, detect_objects(img, model)) for filename, img in images]\n'

In [6]:
def map_label_to_category(label) -> str:
    # Map object detection label to our categories
    label_map = {1: "Logo", 2: "CTA Button", 3: "Icon", 4: "Product Image", 5: "Text Elements", 6:"Background", 7:"End frame"}
    return label_map.get(label.item(), "Unknown")

In [7]:
# Load detection model
model = load_detection_model()

# Detect objects and label them
labeled_images = []
for filename, image in images:
    predictions = detect_objects(image, model)
    labeled_boxes = label_objects(predictions)
    labeled_images.append((filename, image, labeled_boxes))

print("Labeled images: ", labeled_images)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 107MB/s]


RuntimeError: The size of tensor a (4) must match the size of tensor b (3) at non-singleton dimension 0

In [9]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.58-py3-none-any.whl (802 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.7/802.7 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-c

**image detection using yolo8**

In [10]:
from ultralytics import YOLO
model = YOLO("yolov8m.pt")
results = model.predict(image_folder)


Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:00<00:00, 233MB/s]




errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/17 /content/drive/MyDrive/Week12/Asserts/fef95c5e1ee5bc235b56d7c508d3bcd0/_preview.png: 640x448 (no detections), 1384.9ms
image 2/17 /content/drive/MyDrive/Week12/Asserts/fef95c5e1ee5bc235b56d7c508d3bcd0/advertised_item.png: 352x640 1 car, 840.5ms
image 3/17 /content/drive/MyDrive/Week12/Asserts/fef95c5e1ee5bc235b56d7c508d3bcd0/cta.png: 160x640 (no detections), 410.3ms
image 4/17 /content/drive/MyDrive/Week12/Asserts/fef95c5e1ee5bc235b56d7c508d3bcd0/endframe_1.jpg: 640x448 1 car, 1066.2ms
image 5/17 /content/drive/MyDrive/Week12/Asserts/fef95c5e1ee5bc235b56d