In [11]:
import torchvision
import torch.nn as nn
import torch
import json
from PIL import Image
from torchvision.transforms import Resize
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import torchvision.transforms as T
from tqdm.notebook import tqdm
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights

In [12]:
class TrainingDataset(torch.utils.data.Dataset):
    def __init__(self, data, validation=False):
        self.images = []
        self.boxes = []


        x_final, y_final = 600, 1200
        transform = Resize((y_final, x_final))
        to_tensor = ToTensor()
        for item in data:
            file_name = item['data']['image'].split('-')[-1]

            image_path = file_name  # Zmień odpowiednio ścieżkę do katalogu ze zdjęciami
            image = Image.open(image_path)
            image = to_tensor(transform(image))*255
            image = image.type(torch.uint8)
            self.images.append(image)
            points = item['annotations'][0]['result'][0]['value']['points']

            xmin = min(point[0] for point in points)/100*x_final
            ymin = min(point[1] for point in points)/100*y_final
            xmax = max(point[0] for point in points)/100*x_final
            ymax = max(point[1] for point in points)/100*y_final
            box = torch.tensor([[xmin, ymin, xmax, ymax]])
            self.boxes.append(box)

        self.labels = torch.ones((len(data), 1), dtype=torch.int64)*84


        transforms = [T.ElasticTransform(alpha=45.0), #T.Grayscale(),
                      T.ColorJitter(brightness=.5, hue=.3),
                      T.RandomInvert(), T.RandomPosterize(bits=2),
                      T.RandomSolarize(threshold=0.5),
                      T.RandomAdjustSharpness(sharpness_factor=2),
                      T.RandomAutocontrast(),
                      T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
                      ]
        self.transforms = T.Compose([*[T.RandomApply([transform], p=0.5) for transform in transforms]])
        if validation:
            self.transforms = None
        self.transforms = None
        self.normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        self.normalize = lambda x: x

    # def crop_img_with_box(self, img, box):
    #     # TODO: crop image with box
    #     return img, box

    def __getitem__(self, idx):
        out_img = self.images[idx]
        if self.transforms is not None:
            out_img = self.transforms(out_img)

        out_box = self.boxes[idx]
        # out_img, out_box = self.crop_img_with_box(out_img, out_box)
        return self.normalize(out_img/255), out_box, self.labels[idx]

    def __len__(self):
        return len(self.images)


In [13]:
with open('labelj.json') as f:
    data = json.load(f)


In [14]:
model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, box_score_thresh=0.5)
weights_dict = dict(model.named_parameters())
for k, v in weights_dict.items():
    if "box_predictor" not in k:
        v.requires_grad = False
    else:
        v.requires_grad = True

In [15]:


#Hiperparametr
epochs = 20
#lr, momentum - hiperparametry
#można spróbować torch.optim.Adam
optimizer = torch.optim.SGD(model.parameters(), lr=0.000, momentum=0.0)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
training_dataset = TrainingDataset(data)
#batch_size = 2 - hiperparametr
training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=2, shuffle=True)
val_dataset = TrainingDataset(data, validation=True)
val_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=2, shuffle=True)

for epoch in tqdm(list(range(epochs))):
    model.train()
    for images, boxes, labels in training_dataloader:
        images = list(image.to(device) for image in images)
        targets = []
        for i in range(len(images)):
            d = {}
            d['boxes'] = boxes[i].to(device)
            d['labels'] = labels[i].to(device)
            targets.append(d)
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        optimizer.zero_grad()
        print(losses)
    model.eval()
    with torch.no_grad():
        for images, boxes, labels in val_dataloader:
            images = list(image.to(device) for image in images)

            preds = model(images)

#Zapisanie modelu
torch.save(model.state_dict(), "model.pth")


  0%|          | 0/20 [00:00<?, ?it/s]

tensor(0.3632, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3170, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.4183, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2971, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2834, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2397, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3400, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3819, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2279, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.4239, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2546, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3267, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.3613, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2927, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2996, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.4257, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.2217, device='cuda:0', grad_fn=<AddBackward0>)
tensor(0.4850, device='cuda:0', grad_fn=<AddBack

Tutaj są predykcje dla wytrenowanego modelu

In [19]:
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

import torch

model.to("cpu")
model.eval()
for img, boxes, labels in val_dataset:
    preds = model([img])
    img = img*255
    img = img.type(torch.uint8)
    box_im = draw_bounding_boxes(img, boxes=boxes,

                              colors="red",
                              width=4)
    # boxes = [box for box, label, score in el.items() if label == 84 for el in preds]
    boxes = []
    for el in preds:
        for box, label, score in zip(el['boxes'], el['labels'], el['scores']):
            if label == 84:
                boxes.append(box)
    # if boxes:
    #     boxes = torch.stack(boxes)
    #     box_im = draw_bounding_boxes(box_im, boxes=boxes,

    #                           colors="green",
    #                           width=4)
    im = to_pil_image(box_im.detach())
    im.show()


KeyboardInterrupt: 

Tutaj są predykcje dla modelu bez zmian

In [27]:
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, box_score_thresh=0.5)
model.to("cpu")
model.eval()
for img, boxes, labels in val_dataset:
    preds = model([img])
    img = img*255
    img = img.type(torch.uint8)
    box_im = draw_bounding_boxes(img, boxes=boxes,

                              colors="red",
                              width=4)
    # boxes = [box for box, label, score in el.items() if label == 84 for el in preds]
    boxes = []
    for el in preds:
        for box, label, score in zip(el['boxes'], el['labels'], el['scores']):
            if label == 84:
                boxes.append(box)
    if boxes:
        boxes = torch.stack(boxes)
        box_im = draw_bounding_boxes(box_im, boxes=boxes,

                              colors="green",
                              width=4)
    im = to_pil_image(box_im.detach())
    im.show()


KeyboardInterrupt: 