In [1]:
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F



In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
torch.cuda.empty_cache()

Using cuda device


In [3]:
# mat = scipy.io.loadmat('cupDataset.mat')
# mat.keys()


In [4]:
file_path = 'data.txt'

with open(file_path, 'r') as file:
    data_lines = file.readlines()

data_lines[:10]

['           cupImagename                   cup       \n',
 '    ___________________________    _________________\n',
 '\n',
 "    {'cup_images\\cup(1).jpg'  }    {[147 57 67 105]}\n",
 "    {'cup_images\\cup(10).jpg' }    {[  18 61 48 99]}\n",
 "    {'cup_images\\cup(100).jpg'}    {[ 156 69 58 95]}\n",
 "    {'cup_images\\cup(101).jpg'}    {[ 171 9 53 165]}\n",
 "    {'cup_images\\cup(102).jpg'}    {[  38 88 21 67]}\n",
 "    {'cup_images\\cup(103).jpg'}    {[148 28 72 187]}\n",
 "    {'cup_images\\cup(104).jpg'}    {[  2 51 49 125]}\n"]

In [5]:
import re
import pandas as pd

def parse_line(line):
 
    match = re.match(r"\s*{'(.*?)'}\s*{\[(.*?)\]}", line)
    if match:
        image_name = match.group(1)
        cup_data = list(map(int, match.group(2).split()))
        return {'cupImagename': image_name, 'cup': cup_data}
    return None

parsed_data = [parse_line(line) for line in data_lines if line.strip() and not line.startswith('cupImagename')]

parsed_data = [data for data in parsed_data if data is not None]

df = pd.DataFrame(parsed_data)
df['cup'] = df['cup'].apply(lambda x: [float(i) for i in x])


df.head()
df.to_excel('data.xlsx')

In [6]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class CupDatasetObjectDetection(Dataset):
    def validate_and_correct_box(self, box):
        """
        Ensure that the bounding box has positive width and height.
        If not, correct the box or exclude it.
        """
        xmin, ymin, xmax, ymax = box
        return  xmin, ymin, xmax, ymax
   

    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform or transforms.ToTensor()
        self.image_paths = []
        self.bounding_boxes = []

        for index, row in dataframe.iterrows():
            img_name = row['cupImagename']
            if root_dir in img_name:
                full_path = img_name
            else:
                full_path = os.path.join(self.root_dir, img_name)
            if os.path.exists(full_path) and full_path.endswith(('.jpg', '.png', '.jpeg')):
                self.image_paths.append(full_path)
                self.bounding_boxes.append(row['cup'])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        box = self.bounding_boxes[idx]
        box = self.validate_and_correct_box(box)
        if box is None:
            return self.__getitem__((idx + 1) % len(self))
            
        box_tensor = torch.as_tensor(box, dtype=torch.float32)
        labels = torch.ones((1,), dtype=torch.int64) 
 
        target = {
            "boxes": box_tensor.unsqueeze(0), 
            "labels": labels,
            "image_id": torch.tensor([idx]),
            "area": (box_tensor[3] - box_tensor[1]) * (box_tensor[2] - box_tensor[0]),
            "iscrowd": torch.zeros((1,), dtype=torch.int64)
        }

        return image, target


desired_size = (224, 224) 
transform = transforms.Compose([
    transforms.Resize(desired_size),
    transforms.ToTensor()
])

dataset = CupDatasetObjectDetection(df, root_dir='cup_images', transform=transform)


def collate_fn(batch):

    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    return images, targets

data_loader = DataLoader(dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)


In [7]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  
in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 10 

for epoch in range(num_epochs):
    model.train()
    for images, targets in data_loader:
        print("Images type:", type(images))
        print("Targets type:", targets)
        if isinstance(targets, list) and all(isinstance(t, dict) for t in targets):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


            try:
                loss_dict = model(images, targets)
               
            except AssertionError as e:
                for i, t in enumerate(targets):
                    print(f"Target {i}: {t['boxes']}")
                raise e

            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
        else:
            print("Error: targets are not in the correct format")

    print(f"Epoch {epoch} - Loss: {losses.item()}")


torch.save(model.state_dict(), 'cup_detection_model.pth')




Images type: <class 'list'>
Targets type: [{'boxes': tensor([[156.,  69.,  58.,  95.]]), 'labels': tensor([1]), 'image_id': tensor([0]), 'area': tensor(-2548.), 'iscrowd': tensor([0])}, {'boxes': tensor([[171.,   9.,  53., 165.]]), 'labels': tensor([1]), 'image_id': tensor([1]), 'area': tensor(-18408.), 'iscrowd': tensor([0])}, {'boxes': tensor([[38., 88., 21., 67.]]), 'labels': tensor([1]), 'image_id': tensor([2]), 'area': tensor(357.), 'iscrowd': tensor([0])}, {'boxes': tensor([[148.,  28.,  72., 187.]]), 'labels': tensor([1]), 'image_id': tensor([3]), 'area': tensor(-12084.), 'iscrowd': tensor([0])}]
Target 0: tensor([[156.,  69.,  58.,  95.]], device='cuda:0')
Target 1: tensor([[171.,   9.,  53., 165.]], device='cuda:0')
Target 2: tensor([[38., 88., 21., 67.]], device='cuda:0')
Target 3: tensor([[148.,  28.,  72., 187.]], device='cuda:0')


AssertionError: All bounding boxes should have positive height and width. Found invalid box [557.142822265625, 246.42857360839844, 207.14285278320312, 339.28570556640625] for target at index 0.