In [22]:
import cv2
import numpy as np

In [23]:
from torchvision.io.image import decode_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

img = decode_image(r"C:\Users\Lucas\Documents\GitHub\OrangeDetect\2m9ewge9sd681.jpg")

# Step 1: Initialize model with the best available weights
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
model.eval()

# Step 2: Initialize the inference transforms
preprocess = weights.transforms()

# Step 3: Apply inference preprocessing transforms
batch = [preprocess(img)]

# Step 4: Use the model and visualize the prediction
prediction = model(batch)[0]
labels = [weights.meta["categories"][i] for i in prediction["labels"]]
box = draw_bounding_boxes(img, boxes=prediction["boxes"],
                          labels=labels,
                          colors="red",
                          width=4, font_size=30)
im = to_pil_image(box.detach())
im.show()



In [24]:
prediction

{'boxes': tensor([[ 925.4276, 1426.9391, 2195.3459, 2674.1357],
         [1089.1602,   73.4330, 2286.5332, 1289.4617],
         [2236.9993, 1299.5852, 2880.9663, 2275.0403],
         [   0.0000, 1504.4923,  997.4054, 2585.3723],
         [2029.5212,  223.2374, 2874.3406, 1336.7885],
         [ 403.0679,  582.5072, 1597.8832, 1434.3579],
         [ 315.8342, 1106.6494, 1402.6467, 1748.0195]],
        grad_fn=<StackBackward0>),
 'labels': tensor([55, 55, 55, 55, 55, 55, 55]),
 'scores': tensor([0.9985, 0.9955, 0.9927, 0.9902, 0.9869, 0.9856, 0.9695],
        grad_fn=<IndexBackward0>)}

O intuito aqui é extrairmos as laranjas segmentadas pelas bounding boxes que o modelo preveu, assim mandando as laranjas para o modelo classificador.

In [25]:
# Acessando as labels
# O código acima em questão é da própria documentação do Pytorch. Isso vai ser a base para nosso código
labels

['orange', 'orange', 'orange', 'orange', 'orange', 'orange', 'orange']

In [26]:
# Acessando o output
# O que nos interessa mesmo é apenas as boxes. O resto é irrelevante
prediction

{'boxes': tensor([[ 925.4276, 1426.9391, 2195.3459, 2674.1357],
         [1089.1602,   73.4330, 2286.5332, 1289.4617],
         [2236.9993, 1299.5852, 2880.9663, 2275.0403],
         [   0.0000, 1504.4923,  997.4054, 2585.3723],
         [2029.5212,  223.2374, 2874.3406, 1336.7885],
         [ 403.0679,  582.5072, 1597.8832, 1434.3579],
         [ 315.8342, 1106.6494, 1402.6467, 1748.0195]],
        grad_fn=<StackBackward0>),
 'labels': tensor([55, 55, 55, 55, 55, 55, 55]),
 'scores': tensor([0.9985, 0.9955, 0.9927, 0.9902, 0.9869, 0.9856, 0.9695],
        grad_fn=<IndexBackward0>)}

In [27]:
def extract_segment_torch(img, bbox):
    """
    Recorta um segmento da imagem com base na bounding box fornecida.

    img: Tensor da imagem (C, H, W) ou (H, W, C)
    bbox: Tensor com [x1, y1, x2, y2]
    """
    # Garantir que a bounding box é do tipo inteiro
    bbox = bbox.long()
    x1, y1, x2, y2 = bbox

    # Se a imagem estiver no formato (C, H, W), converte pra (H, W, C)
    if img.dim() == 3 and img.shape[0] <= 4:  # assume (C, H, W)
        img = img.permute(1, 2, 0)

    # Recorta o segmento
    segmento = img[y1:y2, x1:x2]

    return segmento

    
# Código gerado por IA

In [28]:
for box in prediction['boxes']:
    imgs = extract_segment_torch(img, box)

In [29]:
imgs = extract_segment_torch(img, prediction['boxes'][0])

In [30]:
imgs = imgs.permute(2, 0, 1)

In [31]:
imgs = to_pil_image(imgs.detach())

imgs.show()

In [32]:
for box in prediction['boxes']:
    imgs = extract_segment_torch(img, box)
    imgs = imgs.permute(2, 0, 1)
    imgs = to_pil_image(imgs.detach())
    imgs.show()

In [33]:
import sys
import os

# Adiciona a raiz do projeto no path
sys.path.append(os.path.abspath('..'))  # sobe uma pasta

from src import Detect


Conseguimos extrair o objeto segmentando da imagem!

In [34]:
# Vamos testar usando o classe Detect 

import os

model_path = os.getenv('MODELS_FOLDER')

for box in prediction['boxes']: 
    imgs = extract_segment_torch(img, box)
    imgs= np.array(imgs)
    orange = Detect(model_path, imgs)
    print(orange.pred())

  imgs= np.array(imgs)


tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(0)
