In [19]:
from sklearn.model_selection import train_test_split

import cv2
import os
import random

import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import os
import cv2
import matplotlib.pyplot as plt
import shutil


from ultralytics import YOLO
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [2]:
def load_class_names(file_path):
    with open(file_path, 'r') as file:
        class_names = [line.strip() for line in file.readlines()]
    return class_names

def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)

def load_labels(label_path):
    with open(label_path, 'r') as file:
        labels = [line.strip().split() for line in file.readlines()]
    return labels

def plot_image_with_labels(image, labels, class_names, image_id):
    plt.imshow(image)
    plt.title(f'Image ID: {image_id}')
    height, width, _ = image.shape
    for label in labels:
        class_id, x_center, y_center, bbox_width, bbox_height = map(float, label)
        x_center *= width
        y_center *= height
        bbox_width *= width
        bbox_height *= height
        x_min = x_center - bbox_width / 2
        y_min = y_center - bbox_height / 2
        rect = plt.Rectangle((x_min, y_min), bbox_width, bbox_height, edgecolor='r', facecolor='none')
        plt.gca().add_patch(rect)
        
        # Добавление текста с именем класса
        class_name = class_names[int(class_id)]
        # plt.text(x_min, y_min - 10, class_name, color='red', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
    
    plt.show()

In [34]:
def inferance(yolo, image_path, label_path):
    img = load_image(image_path)
    labels = load_labels(label_path)
    pred = yolo(img, conf=0.5, device='cuda', show=False, save=True)
    pred_boxes = pred[0].boxes.xywhn.cpu().numpy() 
    pred_cls = pred[0].boxes.cls.cpu().numpy().reshape(-1, 1)
    pred_labels = np.hstack([pred_cls, pred_boxes])
    
    plot_image_with_labels(img, labels, pred_boxes, image_id='True')
    plot_image_with_labels(img, pred_labels, pred_boxes, image_id='Predict')

In [3]:
image_path = '/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/val/images/1 (11).jpg'
label_path = '/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/val/labels/1 (11).txt'

# inferance(yolo, image_path, label_path)

In [8]:
import numpy as np

np.array(load_labels('/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/test/labels/9 (73).txt'))[:, 0]

array(['2'], dtype='<U8')

In [225]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
import cv2

def load_labels(label_path):
    with open(label_path, 'r') as file:
        labels = [line.strip().split() for line in file.readlines()]
    return np.array(labels)

class CustomData(Dataset):
    def __init__(self, image_dir, bbox_dir, img_size, labels_dict, transform=None):
        super(CustomData).__init__()
        self.image_dir = image_dir
        self.bbox_dir = bbox_dir
        self.img_size = img_size
        self.labels_dict = labels_dict
        self.transform = transform
        self.images = os.listdir(self.image_dir)
        self.bboxes = os.listdir(self.bbox_dir)

    def __len__(self):
        return len(self.bbox_dir)

    def __getitem__(self, index):
        image_name = self.images[index]
        bbox_name = self.bboxes[index]
        image_path = rf'{self.image_dir}/{image_name}'
        bbox_path = rf'{self.bbox_dir}/{bbox_name}'
        print(image_path)
        # image = cv2.imread(image_path)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB), (640, 640))
        labels_bboxes = load_labels(bbox_path)
        print(labels_bboxes.shape)
        if labels_bboxes is None:
            return image, []

        if len(labels_bboxes)==0:
            return image, []

        # print(labels_bboxes)
        labels = labels_bboxes[:, 0].astype(np.int16)
        bboxes = labels_bboxes[:, 1:].astype(np.float16)
        w, h, _ = image.shape
        
        
        # # shape=(1, 4)
        # bboxes[:, 0] = bboxes[:, 0]*w
        # bboxes[:, 2] = bboxes[:, 2]*w
        # bboxes[:, 1] = bboxes[:, 1]*h
        # bboxes[:, 3] = bboxes[:, 3]*h
        

        # bboxes = torch.tensor(bboxes, dtype=torch.int16)
        
        if self.transform:
            transformed = self.transform(image=image, bboxes=bboxes)
            image = transformed['image']
            bboxes = transformed['bboxes']
        
        
        ########################################################

        area = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
        area = torch.tensor(area, dtype=torch.int16)
        # labels = [self.labels_dict[label] for label in bboxes[:, 4]]
        labels = torch.tensor(labels, dtype=torch.int16)
        
        # labels = None
        target = dict()
        target['boxes'] = bboxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index], dtype=torch.int16)
        target['area'] = area

        ########################################################
        return image, target

In [247]:
# def get_train_transforms():
#     return A.Compose([
#         A.Resize(640, 640),
#         A.SomeOf([
#             A.Blur(p=1, blur_limit=3),
#             A.Flip(p=1, always_apply=True),
#             A.GaussNoise(p=1, always_apply=True),
#             A.GridDistortion(p=1, always_apply=True),
#             A.Rotate((-45, 45), p=1, always_apply=True),
#             A.Transpose(p=1, always_apply=True),
#             A.RandomBrightnessContrast((-0.1, 0.1), (-0.1, 0.1), p=1, always_apply=True)
#         ], p=0.9, n=1),
#         ToTensorV2()
#     ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# class CustomData(Dataset):
#     def __init__(self, image_dir, bbox_dir, img_size, labels_dict, transform=None):
#         super(CustomData).__init__()
#         self.image_dir = image_dir
#         self.bbox_dir = bbox_dir
#         self.img_size = img_size
#         self.labels_dict = labels_dict
#         self.transform = transform
#         self.images = os.listdir(self.image_dir)
#         self.bboxes = os.listdir(self.bbox_dir)

#     def __len__(self):
#         return len(self.bboxes)

#     def __getitem__(self, index):
#         image_name = self.images[index]
#         bbox_name = self.bboxes[index]
#         image_path = os.path.join(self.image_dir, image_name)
#         bbox_path = os.path.join(self.bbox_dir, bbox_name)
#         image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
#         labels_bboxes = load_labels(bbox_path)

#         if labels_bboxes is None or len(labels_bboxes) == 0:
#             return image, []

#         labels = labels_bboxes[:, 0].astype(np.int16)
#         bboxes = labels_bboxes[:, 1:].astype(np.float32)

#         if self.transform:
#             transformed = self.transform(image=image, bboxes=bboxes, labels=labels)
#             image = transformed['image']
#             bboxes = transformed['bboxes']
#             labels = transformed['labels']

#         area = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
#         area = torch.tensor(area, dtype=torch.float32)
#         labels = torch.tensor(labels, dtype=torch.int64)

#         target = dict()
#         target['boxes'] = torch.tensor(bboxes, dtype=torch.float32)
#         target['labels'] = labels
#         target['image_id'] = torch.tensor([index], dtype=torch.int64)
#         target['area'] = area

#         return image, target

def collate_fn(batch):
    batch = list(filter(lambda x: len(x[1]) > 0, batch))
    if not batch:
        return torch.empty(0), torch.empty(0)
    images, targets = zip(*batch)
    images = torch.stack(images)
    return images, targets



In [248]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN_ResNet50_FPN_V2_Weights, FasterRCNN_MobileNet_V3_Large_FPN_Weights
import torch


DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

def get_model(num_classes = 6): # 4 = 3 + 1, учитываем еще фон
    # box_detections_per_img=50
    model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes).to(DEVICE)
    return model

In [249]:
# import clearml
import torch
from torch.utils.data import DataLoader
from torch import optim

def train(train_loader, val_loader, epochs):
    model = get_model(num_classes=6)
    params = [param for param in model.parameters() if param.requires_grad]
    optimizer = optim.SGD(params,  lr=0.005, momentum=0.9, weight_decay=0.0005)
    
    total_train_loss = []
    for epoch in range(epochs):
        print(f'Epoch: {epoch + 1}')
        train_loss = []
        model.train()
        for images, targets in train_loader:
            images = list(torch.tensor(image, dtype=torch.float32).to(device) for image in images)
            targets = [{k: torch.tensor(v, dtype=torch.int32).to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            train_loss.append(losses.item())
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        epoch_train_loss = np.mean(train_loss)
        total_train_loss.append(epoch_train_loss)
        print(f'Epoch train loss is: {epoch_train_loss}')

    return model, total_train_loss

In [250]:
train_transform = A.Compose([
                A.SomeOf([
                    A.Blur(p=1, blur_limit=3),
                    A.Flip(p=1, ),
                    A.GaussNoise(p=1),
                    A.Transpose(p=1),
                    
                    # A.GridDistortion(p=1),
                    # A.Rotate((-45, 45), p=1, ),
                    # A.MedianBlur(p=1),
                    # A.CLAHE(p=1),
                    # A.RandomBrightnessContrast(p=1),
                    # A.RandomGamma(p=1),
                ], p=0.9, n=1),
                A.Resize(640, 640),
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2()
            ])
val_transform = A.Compose([
        A.Resize(640, 640),
        ToTensorV2()
])

epochs = 10

In [251]:

# dataset = CustomData(image_dir, bbox_dir, img_size=640, labels_dict=labels_dict, transform=get_train_transforms())
# dataloader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)

device='cuda' if torch.cuda.is_available() else 'cpu'

train_dataset = CustomData(image_dir='/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/train/images', 
                              bbox_dir='/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/train/labels',
                              img_size=(640, 640), 
                              labels_dict='', 
                              transform=None)

val_dataset = CustomData(image_dir='/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/val/images', 
                              bbox_dir='/home/jupyter/datasphere/project/atomic_hach/hackaton/split_dataset/val/labels',
                              img_size=(640, 640), 
                              labels_dict='', 
                              transform=None)
# test_dataset = CustomData(image_dir, bbox_dir, img_size, labels_dict, transform)


train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)

In [None]:
model, total_train_loss = train(train_loader, val_loader, epochs)