In [1]:
import torch
from torch import nn
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F



In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")
torch.cuda.empty_cache()

Using cuda device


In [3]:
# mat = scipy.io.loadmat('cupDataset.mat')
# mat.keys()


In [4]:
file_path = 'data.txt'

with open(file_path, 'r') as file:
    data_lines = file.readlines()

# Display the first few lines to understand the structure of the data
data_lines[:10]

['           cupImagename                   cup       \n',
 '    ___________________________    _________________\n',
 '\n',
 "    {'cup_images\\cup(1).jpg'  }    {[147 57 67 105]}\n",
 "    {'cup_images\\cup(10).jpg' }    {[  18 61 48 99]}\n",
 "    {'cup_images\\cup(100).jpg'}    {[ 156 69 58 95]}\n",
 "    {'cup_images\\cup(101).jpg'}    {[ 171 9 53 165]}\n",
 "    {'cup_images\\cup(102).jpg'}    {[  38 88 21 67]}\n",
 "    {'cup_images\\cup(103).jpg'}    {[148 28 72 187]}\n",
 "    {'cup_images\\cup(104).jpg'}    {[  2 51 49 125]}\n"]

In [5]:
import re
import pandas as pd

# Function to parse a single line of data
def parse_line(line):
    # Using regular expression to extract the image name and the corresponding data
    match = re.match(r"\s*{'(.*?)'}\s*{\[(.*?)\]}", line)
    if match:
        image_name = match.group(1)
        cup_data = list(map(int, match.group(2).split()))
        return {'cupImagename': image_name, 'cup': cup_data}
    return None

# Parse each line and create a list of dictionaries
parsed_data = [parse_line(line) for line in data_lines if line.strip() and not line.startswith('cupImagename')]

# Remove None values (lines that did not match the pattern)
parsed_data = [data for data in parsed_data if data is not None]

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(parsed_data)
df['cup'] = df['cup'].apply(lambda x: [float(i) for i in x])

# Display the first few rows of the DataFrame



# Display the first few rows of the cleaned DataFrame
df.head()
df.to_excel('data.xlsx')

In [6]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class CupDatasetObjectDetection(Dataset):
    def validate_and_correct_box(self, box):
        """
        Ensure that the bounding box has positive width and height.
        If not, correct the box or exclude it.
        """
        xmin, ymin, xmax, ymax = box
        if xmax <= xmin or ymax <= ymin:
            # Correct the box or return None to exclude it
            return [min(xmin, xmax), min(ymin, ymax), max(xmin, xmax), max(ymin, ymax)]
        return box

    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform or transforms.ToTensor()
        self.image_paths = []
        self.bounding_boxes = []

        for index, row in dataframe.iterrows():
            img_name = row['cupImagename']
            if root_dir in img_name:
                full_path = img_name
            else:
                full_path = os.path.join(self.root_dir, img_name)
            if os.path.exists(full_path) and full_path.endswith(('.jpg', '.png', '.jpeg')):
                self.image_paths.append(full_path)
                self.bounding_boxes.append(row['cup'])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        box = self.bounding_boxes[idx]
        box = self.validate_and_correct_box(box)
        if box is None:
            # Handle the case of an invalid box, e.g., skip this item
            return self.__getitem__((idx + 1) % len(self))
            
        box_tensor = torch.as_tensor(box, dtype=torch.float32)
        labels = torch.ones((1,), dtype=torch.int64) 
 
        target = {
            "boxes": box_tensor.unsqueeze(0), 
            "labels": labels,
            "image_id": torch.tensor([idx]),
            "area": (box_tensor[3] - box_tensor[1]) * (box_tensor[2] - box_tensor[0]),
            "iscrowd": torch.zeros((1,), dtype=torch.int64)
        }

        return image, target

# Define your desired size and transformations
desired_size = (224, 224)  # Example size, change according to your needs
transform = transforms.Compose([
    transforms.Resize(desired_size),
    transforms.ToTensor()
])

# Assuming 'df' is your DataFrame and 'cup_images' is the directory containing images
dataset = CupDatasetObjectDetection(df, root_dir='cup_images', transform=transform)

# Create DataLoader
def collate_fn(batch):
    """
    Custom collate function for handling batches of images and targets.
    """
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # No need to collate images as they are already tensors
    # But ensure targets are in a list
    return images, targets

# Use the custom collate function in DataLoader
data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)


In [7]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load a pre-trained model for fine-tuning
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # 1 class (cup) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the head of the classifier with a new one (for our number of classes)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move model to the right device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Define optimizer and learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 10  # You can adjust this

for epoch in range(num_epochs):
    model.train()
    for images, targets in data_loader:
        print("Images type:", type(images))
        print("Targets type:", targets)
        if isinstance(targets, list) and all(isinstance(t, dict) for t in targets):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


            try:
                loss_dict = model(images, targets)
                # ... rest of the training loop ...
            except AssertionError as e:
                for i, t in enumerate(targets):
                    print(f"Target {i}: {t['boxes']}")
                raise e

            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
        else:
            print("Error: targets are not in the correct format")

    print(f"Epoch {epoch} - Loss: {losses.item()}")

# Save the trained model
torch.save(model.state_dict(), 'cup_detection_model.pth')




Images type: <class 'list'>
Targets type: [{'boxes': tensor([[ 69.,  49., 138., 113.]]), 'labels': tensor([1]), 'image_id': tensor([104]), 'area': tensor(4416.), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 56.,  54., 165., 102.]]), 'labels': tensor([1]), 'image_id': tensor([46]), 'area': tensor(5232.), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 42.,  62., 155.,  86.]]), 'labels': tensor([1]), 'image_id': tensor([156]), 'area': tensor(2712.), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 45.,  70., 162., 109.]]), 'labels': tensor([1]), 'image_id': tensor([17]), 'area': tensor(4563.), 'iscrowd': tensor([0])}]
Images type: <class 'list'>
Targets type: [{'boxes': tensor([[ 71.,  46., 146., 133.]]), 'labels': tensor([1]), 'image_id': tensor([97]), 'area': tensor(6525.), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 33.,  58.,  58., 117.]]), 'labels': tensor([1]), 'image_id': tensor([66]), 'area': tensor(1475.), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 47.,  36., 118., 105.]]), 'labels'

AssertionError: All bounding boxes should have positive height and width. Found invalid box [157.14285278320312, 221.42857360839844, 617.8571166992188, 221.42857360839844] for target at index 3.