# Params

In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_epochs = 2
batch_size = 8 


In [2]:
def custom_collate_fn(batch):
    # Separate images and targets
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Images are stacked as usual
    images = torch.stack(images, dim=0)

    # Targets are already in the desired format (list of dicts)
    return images, targets

# Data loading

In [3]:
import os
import numpy as np
from PIL import Image
from torchvision import transforms

# Convert YOLO format to Fast R-CNN format
def yolo_to_fast_rcnn(yolo_box, img_width, img_height):
    x_center, y_center, width, height = yolo_box
    x_min = x_center - (width / 2)
    y_min = y_center - (height / 2)
    x_max = x_center + (width / 2)
    y_max = y_center + (height / 2)
    return [x_min, y_min, x_max, y_max]

# Updated function to read label files in YOLO format
def read_yolo_label(label_path):
    with open(label_path, 'r') as file:
        lines = file.readlines()
        class_label = int(lines[0].split()[0])  # Extract class label from the first line
        yolo_box = [float(val) for val in lines[0].split()[1:]]  # Extract YOLO bounding box data
        return class_label, yolo_box

data_folder = 'data_zadanie4'  # Replace 'data_zadanie4' with your actual path

images_folder = os.path.join(data_folder, 'images')
labels_folder = os.path.join(data_folder, 'labels')

dataset = []

image_files = sorted(os.listdir(images_folder))
label_files = sorted(os.listdir(labels_folder))

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert to RGB if grayscale
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor()  # Convert to tensor
])

# Iterate through image and label files
for img_file, label_file in zip(image_files, label_files):
    if img_file.endswith('.jpg') and label_file.endswith('.txt'):
        img_path = os.path.join(images_folder, img_file)
        label_path = os.path.join(labels_folder, label_file)

        # Load image using PIL
        image = Image.open(img_path)
        img_width, img_height = image.size

        # Convert YOLO format to Fast R-CNN format
        class_label, yolo_box = read_yolo_label(label_path)
        fast_rcnn_box = yolo_to_fast_rcnn(yolo_box, img_width, img_height)

        # Resize image to 224x224 and convert to PyTorch tensor
        resized_image = transform(image)

        # Create targets for Fast R-CNN
        target = {
            'boxes': torch.tensor([fast_rcnn_box], dtype=torch.float32),  # Convert box to tensor
            'labels': torch.tensor([class_label], dtype=torch.int64)  # Convert label to tensor
        }

        # Append resized image tensor and target dictionary to the dataset
        dataset.append((resized_image, target))


# for i in range(len(dataset)):
#     image, target = dataset[i]
#     print(f"{i}. Image shape: {image.shape}, Target information: {target}")

# Dataloaders

In [4]:
import torch
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.io.image import read_image
from torchvision.transforms.functional import to_tensor
import torch.nn as nn
from torch.utils.data import Subset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.data = dataset

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, target = self.data[idx]
        # Make sure target is a dictionary with 'boxes' and 'labels'
        return image, target


custom_dataset = CustomDataset(dataset)

# Split the indices for training and testing
train_indices = list(range(300))  # Assuming 300 samples for training
test_indices = list(range(300, 400))  # Assuming 100 samples for testing

# Create Subset datasets for training and testing
train_dataset = Subset(custom_dataset, train_indices)
test_dataset = Subset(custom_dataset, test_indices)

# # Create DataLoaders for training and testing
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for batch in train_loader:
    images, targets = batch
    print(f"Batch Image Shape: {images.shape}")

    # Iterate over each target in the list of targets
    for target in targets:
        # Now target is a dictionary, and you can access its items
        for key, value in target.items():
            print(f"Target {key} Shape: {value.shape}")




Batch Image Shape: torch.Size([8, 3, 224, 224])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Batch Image Shape: torch.Size([8, 3, 224, 224])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Size([1])
Target boxes Shape: torch.Size([1, 4])
Target labels Shape: torch.Si

# Model

In [5]:
import torch.optim as optim
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator


# Define the model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
model.train()

# Define optimizer and loss function (you may need to customize this)
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
criterion = nn.CrossEntropyLoss()



In [6]:
# Inspecting the first batch from the DataLoader
# Inspecting the first batch from the DataLoader again
first_batch_images, first_batch_targets = next(iter(train_loader))
print(f"Type of first batch images: {type(first_batch_images)}")  # Should be <class 'torch.Tensor'>
print(f"Type of first batch targets: {type(first_batch_targets)}")  # Should now be <class 'list'>



Type of first batch images: <class 'torch.Tensor'>
Type of first batch targets: <class 'list'>


# Training

In [7]:
for epoch in range(num_epochs):
    running_loss = 0.0

    for images, batch_targets in train_loader:
        if not isinstance(batch_targets, list):
            print("Error: batch_targets is not a list")
            break

        optimizer.zero_grad()

        output = model(images, batch_targets)

        loss = sum(loss for loss in output.values())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{num_epochs}] - Loss: {running_loss / len(train_loader):.4f}")


In [None]:
#torch.save(model, 'modely_zadanie4/faster_rcnn_model.pth')

In [None]:
from sklearn.metrics import confusion_matrix

# Ground truth boxes and labels
ground_truth_boxes = []
ground_truth_labels = []

# Predicted boxes and labels
predicted_boxes = []
predicted_labels = []

# Iterate through the test dataset
for idx in range(len(test_dataset)):
    image, target = test_dataset[idx]
    
    # Ground truth
    ground_truth_boxes.append(target['boxes'])
    ground_truth_labels.append(target['labels'])
    
    # Predictions
    with torch.no_grad():
        output = model([image])
    
    # Assuming the model returns predicted boxes and labels
    predicted_boxes.append(output[0]['boxes'])
    predicted_labels.append(output[0]['labels'])

# Flatten the lists of boxes and labels
flat_ground_truth_boxes = [box for sublist in ground_truth_boxes for box in sublist]
flat_ground_truth_labels = [label for sublist in ground_truth_labels for label in sublist]
flat_predicted_boxes = [box for sublist in predicted_boxes for box in sublist]
flat_predicted_labels = [label for sublist in predicted_labels for label in sublist]

print(len(flat_ground_truth_labels))
print(len(flat_predicted_labels))
print(flat_ground_truth_labels)
print(flat_predicted_labels)

# Convert the box tensors and label tensors to lists or arrays
# Then compute the confusion matrix
conf_matrix = confusion_matrix(flat_ground_truth_labels, flat_predicted_labels)
print(conf_matrix)

AssertionError: targets should not be none when in training mode