In [7]:
from pycocotools.coco import COCO
from PIL import Image, ImageDraw
import os
# Replace 'annotations_file_path' with the path to your COCO annotations file
coco = COCO("./inference_coco/result.json")

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [47]:
from utils.image_utils import extract_bndbox_values
from PIL import Image
import numpy as np
import cv2
import torch
from torchvision import datasets, models, transforms
from torchvision import transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import time
import os
from utils.image_utils import mAlexNet
from pycocotools.coco import COCO
from PIL import Image, ImageDraw
import os

device = "cpu"
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    
print(f"Using {device}")

model = "alex" #m_alex
image_dir= "./inference/images"
annotation_dir= "./inference/Annotations"
model_dir = "./models/final_alex_net_both.pth"
predicted_dir = "./predicted_images"

if not os.path.isdir(predicted_dir):
    os.mkdir(predicted_dir)


# Transformations
transform = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)


if model == "m_alex":
    model = mAlexNet()
elif model  == "alex":
    model = models.alexnet(weights="IMAGENET1K_V1")
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.5, inplace=False),
        nn.Linear(in_features=9216, out_features=256, bias=True),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5, inplace=False),
        nn.Linear(in_features=256, out_features=128, bias=True),
        nn.ReLU(inplace=True),
        nn.Linear(in_features=128, out_features=1, bias=True),
    )
else:
    raise Exception("Not a valid model type")


model = model.to(device)

model.load_state_dict(
    torch.load(model_dir, map_location=torch.device(device))
)
model.eval()

Using cuda


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=256, bias=True)
  

In [48]:

def predict_patch(patch,transform):
    img = transform(patch)
    img = img.unsqueeze(0)
    img = img.to(device)
    with torch.no_grad():                
        outputs = model(img)
        #print(outputs)
        #proba_max, preds = torch.max(outputs, 1)
        preds = (torch.sigmoid(outputs) > 0.5).float()

        is_busy = preds[0]
        return is_busy

In [57]:
images_dir = "./inference_coco"    
coco_images = coco.loadImgs(coco.getImgIds())

id_paths = {}
for image in coco_images:
    id_paths[image["id"]] = os.path.join(images_dir, image['file_name'])
    # id_paths[image.id] = image["file_name"]

for img_id, img_path in  id_paths.items():
    # whole image 
    img = Image.open(img_path).convert('RGB')

    # Load annotations for the current image
    ann_ids = coco.getAnnIds(imgIds=img_id)
    annotations = coco.loadAnns(ann_ids)
    
    # Create a drawing object
    draw = ImageDraw.Draw(img)
    
    # make the mask 
    mask = Image.new('1', (img.width, img.height), 0)
    result = Image.new('RGB', (img.width, img.height))
    # go over the annotations
    for ann in annotations:      
        #cuts the polygon
        ImageDraw.Draw(mask).polygon(ann['segmentation'][0], outline=1, fill=1)
        
        
        # Apply the mask to the image
        
        result.paste(img, mask=mask)

        # Find bounding box of the object
        bbox = mask.getbbox()

        # Crop the image to the bounding box
        cropped_patch = result.crop(bbox)
        
        is_busy = predict_patch(patch=cropped_patch, transform=transform)
        color = (255, 0, 0) if is_busy.item() == 1 else (0, 255, 0)
        draw.polygon(ann['segmentation'][0], outline=color, width=2)
  
    img.save(f"output_{img_id}.png")
        
    

In [5]:
# Replace 'output_dir' with the directory where you want to save the patches
output_dir = 'output_patches'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for ann_id in coco.getAnnIds():
    ann = coco.loadAnns(ann_id)[0]
    image_id = ann['image_id']
    image_info = coco.loadImgs(image_id)[0]
    image_path = os.path.join('./inference_coco', image_info['file_name'])

    
    # Load the image
    img = Image.open(image_path)
    
    # Create a binary mask from the polygon
    mask = Image.new('1', (img.width, img.height), 0)
    #cuts the polygon
    ImageDraw.Draw(mask).polygon(ann['segmentation'][0], outline=1, fill=1)
    
    
    # Apply the mask to the image
    result = Image.new('RGB', (img.width, img.height))
    result.paste(img, mask=mask)

    # Find bounding box of the object
    bbox = mask.getbbox()

    # Crop the image to the bounding box
    cropped_patch = result.crop(bbox)

    # Save the patch
    patch_filename = f"{ann_id}_patch.jpg"
    patch_path = os.path.join(output_dir, patch_filename)
    cropped_patch.save(patch_path)
