In [13]:
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import os
from pycocotools.coco import COCO
from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F
%matplotlib inline

In [14]:
print(torch.__version__)

2.1.0+cu118


In [15]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
torch.cuda.empty_cache()

Using cuda device


In [16]:
# mat = scipy.io.loadmat('cupDataset.mat')
# mat.keys()


In [17]:
file_path = 'data.txt'

with open(file_path, 'r') as file:
    data_lines = file.readlines()

data_lines[:10]

['           cupImagename                   cup       \n',
 '    ___________________________    _________________\n',
 '\n',
 "    {'cup_images\\cup(1).jpg'  }    {[147 57 67 105]}\n",
 "    {'cup_images\\cup(10).jpg' }    {[  18 61 48 99]}\n",
 "    {'cup_images\\cup(100).jpg'}    {[ 156 69 58 95]}\n",
 "    {'cup_images\\cup(101).jpg'}    {[ 171 9 53 165]}\n",
 "    {'cup_images\\cup(102).jpg'}    {[  38 88 21 67]}\n",
 "    {'cup_images\\cup(103).jpg'}    {[148 28 72 187]}\n",
 "    {'cup_images\\cup(104).jpg'}    {[  2 51 49 125]}\n"]

In [18]:
import re
import pandas as pd

def parse_line(line):
 
    match = re.match(r"\s*{'(.*?)'}\s*{\[(.*?)\]}", line)
    if match:
        image_name = match.group(1)
        cup_data = list(map(int, match.group(2).split()))
        return {'cupImagename': image_name, 'cup': cup_data}
    return None

parsed_data = [parse_line(line) for line in data_lines if line.strip() and not line.startswith('cupImagename')]

parsed_data = [data for data in parsed_data if data is not None]

data = pd.DataFrame(parsed_data)
data['cup'] = data['cup'].apply(lambda x: [float(i) for i in x])
data['cupImagename'] = data['cupImagename'].str.replace(r'cup_images\\', '', regex=True)

data.head()
data.to_excel('data.xlsx')

In [19]:
import ast

# Function to convert string representation of list to actual list


# Optionally, drop the unnamed column if it's just an index
if 'Unnamed: 0' in data.columns:
    data.drop('Unnamed: 0', axis=1, inplace=True)

# Display the modified dataframe
print(data.head())


   cupImagename                         cup
0  cup(100).jpg   [156.0, 69.0, 58.0, 95.0]
1  cup(101).jpg   [171.0, 9.0, 53.0, 165.0]
2  cup(102).jpg    [38.0, 88.0, 21.0, 67.0]
3  cup(103).jpg  [148.0, 28.0, 72.0, 187.0]
4  cup(104).jpg    [2.0, 51.0, 49.0, 125.0]


In [20]:
from torchvision.transforms import v2 as T
from torchvision import transforms
import cv2

class CupDataset(Dataset):
    def __init__(self, dataframe, root_dir, transforms_2 = None):

        self.dataframe = dataframe
        self.transforms_2 = transforms_2
        self.root_dir = root_dir


    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx, 0])
        img = cv2.imread(img_name)
        if img is None:
            raise ValueError(f"Image not found: {img_name}")
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        orig_height, orig_width = image.shape[:2]

        # Scale the image pixel values to [0, 255] if necessary
        if image.dtype == np.float32:
            image = (image * 255).astype(np.uint8)
       
        # Get dimensions of the image
        orig_height, orig_width = image.shape[:2]

        image_id = torch.tensor([idx], dtype=torch.int64)
        box = torch.tensor(self.dataframe.iloc[idx, 1], dtype=torch.float32)

        y_max, y_min, x_max, x_min = box


        scale_x = 224.0 / orig_width
        scale_y = 224.0 / orig_height   


        # Format the box for model input
        new_box = torch.tensor([x_min * scale_x, y_min * scale_y, x_max * scale_x, y_max * scale_y], dtype=torch.float32)
        labels = torch.ones((1,), dtype=torch.int64)
        area = (new_box[3] - new_box[1]) * (new_box[2] - new_box[0])


        iscrowd = torch.zeros((new_box.shape[0],), dtype=torch.int64)
        
        transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])
        image = transform(image)
        # image = np.array(image)
        # print(image)
        

        # Create target dictionary
        target = {}
        target["boxes"] = new_box
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms_2:
            sample = self.transforms_2(image = image,
                                     bboxes = target['boxes'],
                                     labels = labels)
            
            image = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])
        # if self.transform is not None:
        #     image, target = self.transform(image, target)

        return image, target




In [21]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision import transforms
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

def get_object_detection_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model


def collate_fn(batch):
    return tuple(zip(*batch))


def get_transform():
  
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

    
#cup_dataset = CupDataset(dataframe=data, root_dir='C:/Users/denis/Desktop/HNS/Git/svec-safar-HNS/Zadanie4/cup_images/', transforms_2=get_transform())

cup_dataset = CupDataset(dataframe=data, root_dir='C:/Users/denis/Desktop/HNS/Git/svec-safar-HNS/Zadanie4/cup_images/')


# Create a data loader

indices = np.random.permutation(len(cup_dataset)).tolist()

train_data = torch.utils.data.Subset(cup_dataset, indices[:])

data_loader = DataLoader(train_data, batch_size=4, shuffle=True, collate_fn=collate_fn)

In [22]:
for images, targets in data_loader:
    print(targets[0])
    break

n_batches = len(data_loader)

{'boxes': tensor([ 85., 110.,  36.,   1.]), 'labels': tensor([1]), 'image_id': tensor([29]), 'area': tensor(5341.), 'iscrowd': tensor([0, 0, 0, 0])}


In [23]:


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2

model = get_object_detection_model(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)



import time
from torchmetrics.detection.mean_ap import MeanAveragePrecision

metric = MeanAveragePrecision()

# Define the lists to store the loss values for training and validation
train_loss_values = []
val_loss_values = []

def train_model(model, data_loader=None, num_epoch=10):

    for epoch in range(1, num_epoch + 1):
        print(f"Starting epoch {epoch} of {num_epoch}")

        time_start = time.time()
        loss_accum = 0.0
        # loss_mask_accum = 0.0
        loss_classifier_accum = 0.0
        
        model.train()
        
        for batch_idx, (images, targets) in enumerate(data_loader, 1):

            # Predict
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging
            # loss_mask = loss_dict['loss_mask'].item()
            loss_accum += loss.item()
            # loss_mask_accum += loss_mask
            loss_classifier_accum += loss_dict['loss_classifier'].item()

            if batch_idx % 500 == 0:
                print(f"    [Batch {batch_idx:3d} / {n_batches:3d}] Batch train loss: {loss.item():7.3f}.")

        lr_scheduler.step()

        # Train losses
        train_loss = loss_accum / n_batches
        
        # Store the loss value for training
        train_loss_values.append(train_loss)
        
        # train_loss_mask = loss_mask_accum / n_batches
        train_loss_classifier = loss_classifier_accum / n_batches

        elapsed = time.time() - time_start

        torch.save(model.state_dict(), f"pytorch_model-e{epoch}.bin")
        prefix = f"[Epoch {epoch:2d} / {num_epoch:2d}]"
        # print(prefix)
        # print(f"{prefix} Train mask-only loss: {train_loss_mask:7.3f}, classifier loss {train_loss_classifier:7.3f}")
        print(f"{prefix} Train loss: {train_loss:7.3f} [{elapsed:.0f} secs]", end=' | ')
        
        preds_single = []
        targets_single = []
        
    
        
        metric.update(preds_single, targets_single)
        batch_map = metric.compute()
        val_loss_values.append(batch_map)
        print(f"Val mAP: {batch_map['map']}")
        
    return model





In [24]:
num_epoch = 25
model = train_model(model, data_loader, num_epoch)

Starting epoch 1 of 25


AssertionError: Expected target boxes to be a tensor of shape [N, 4], got torch.Size([4]).