# Params

In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_epochs = 2
batch_size = 10


In [2]:
def custom_collate_fn(batch):
    # Separate images and targets
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]

    # Images are stacked as usual
    images = torch.stack(images, dim=0)

    # Targets are already in the desired format (list of dicts)
    return images, targets

# Data loading

In [3]:
import os
import numpy as np
from PIL import Image
from torchvision import transforms

# Convert YOLO format to Fast R-CNN format
def yolo_to_fast_rcnn(yolo_box, img_width, img_height):
    x_center, y_center, width, height = yolo_box
    x_min = x_center - (width / 2)
    y_min = y_center - (height / 2)
    x_max = x_center + (width / 2)
    y_max = y_center + (height / 2)
    return [x_min, y_min, x_max, y_max]

# Updated function to read label files in YOLO format
def read_yolo_label(label_path):
    with open(label_path, 'r') as file:
        lines = file.readlines()
        class_label = int(lines[0].split()[0])  # Extract class label from the first line
        yolo_box = [float(val) for val in lines[0].split()[1:]]  # Extract YOLO bounding box data
        return class_label, yolo_box

data_folder = 'data_zadanie4'  # Replace 'data_zadanie4' with your actual path

images_folder = os.path.join(data_folder, 'images')
labels_folder = os.path.join(data_folder, 'labels')

dataset = []

image_files = sorted(os.listdir(images_folder))
label_files = sorted(os.listdir(labels_folder))

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert to RGB if grayscale
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor()  # Convert to tensor
])

# Iterate through image and label files
for img_file, label_file in zip(image_files, label_files):
    if img_file.endswith('.jpg') and label_file.endswith('.txt'):
        img_path = os.path.join(images_folder, img_file)
        label_path = os.path.join(labels_folder, label_file)

        # Load image using PIL
        image = Image.open(img_path)
        img_width, img_height = image.size

        # Convert YOLO format to Fast R-CNN format
        class_label, yolo_box = read_yolo_label(label_path)
        fast_rcnn_box = yolo_to_fast_rcnn(yolo_box, img_width, img_height)

        # Resize image to 224x224 and convert to PyTorch tensor
        resized_image = transform(image)

        # Create targets for Fast R-CNN
        target = {
            'boxes': torch.tensor([fast_rcnn_box], dtype=torch.float32),  # Convert box to tensor
            'labels': torch.tensor([class_label], dtype=torch.int64)  # Convert label to tensor
        }

        # Append resized image tensor and target dictionary to the dataset
        dataset.append((resized_image, target))


# Dataloaders

In [4]:
import torch
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.io.image import read_image
from torchvision.transforms.functional import to_tensor
import torch.nn as nn
from torch.utils.data import Subset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.data = dataset

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, target = self.data[idx]
        # Make sure target is a dictionary with 'boxes' and 'labels'
        return image, target


custom_dataset = CustomDataset(dataset)

# Split the indices for training and testing
train_indices = list(range(200))  # Assuming 300 samples for training
test_indices = list(range(300, 400))  # Assuming 100 samples for testing

# Create Subset datasets for training and testing
train_dataset = Subset(custom_dataset, train_indices)
test_dataset = Subset(custom_dataset, test_indices)

# # Create DataLoaders for training and testing
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)




# Model

In [5]:
import torch.optim as optim
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# Define the model
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT,trainable_backbone_layers=0)

# # Freeze all layers except the last ones
for param in model.parameters():
    param.requires_grad = False

num_classes = 2  # Change this to your specific number of classes
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

model.train()

# Define optimizer and loss function (you may need to customize this)
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training

In [6]:
for epoch in range(num_epochs):
    running_loss = 0.0
    
    # Counter for batches processed
    batches_processed = 0
    total_batches = len(train_loader)
    
    for batch_idx, (images, batch_targets) in enumerate(train_loader):
        if not isinstance(batch_targets, list):
            print("Error: batch_targets is not a list")
            break
        
        print(images.shape)
        optimizer.zero_grad()
        output = model(images, batch_targets)

        # Individual losses
        loss_classifier = output['loss_classifier']
        loss_box_reg = output['loss_box_reg']
        loss_objectness = output['loss_objectness']
        loss_rpn_box_reg = output['loss_rpn_box_reg']

        # Calculate total loss
        total_loss = (
            loss_classifier +
            loss_box_reg +
            loss_objectness +
            loss_rpn_box_reg
        )

        total_loss.backward()
        optimizer.step()

        running_loss += total_loss.item()

        # Update progress
        batches_processed += 1
        print(f"Epoch [{epoch + 1}/{num_epochs}] - Batch [{batch_idx + 1}/{total_batches}] - Loss: {total_loss.item():.4f}", end='\r')
    
    # Print average loss for each epoch
    print(f"Epoch [{epoch + 1}/{num_epochs}] - Loss: {running_loss / len(train_loader):.4f}")



torch.Size([10, 3, 224, 224])


RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [None]:
torch.save(model, 'modely_zadanie4/faster_rcnn_model.pth')

# Testing

In [None]:
import torch
from torchvision.ops import box_iou
from tqdm import tqdm

model = torch.load('modely_zadanie4/faster_rcnn_model.pth')

def calculate_average_precision(model, test_loader, iou_threshold=0.5):
    model.eval()
    all_predictions = []
    all_targets = []

    batches_processed = 0
    total_batches = len(test_loader)
    with torch.no_grad():
        # for images, targets in tqdm(test_loader, desc='Calculating AP', unit='batch'):
        for batch_idx, (images, batch_targets) in enumerate(test_loader):
            print(f"Batch [{batch_idx + 1}/{total_batches}]", end='\r')
            print(images.shape)
            # Perform inference]
            predictions = model(images) 
            print(predictions)
          
                  
            # Store predictions
            all_predictions.extend(predictions)
            
            # Check if targets are in the correct format
            if not isinstance(batch_targets, list):
                print("Error: batch_targets is not a list")
                break
            
            # Store ground truth targets
            all_targets.extend(batch_targets)
            batches_processed += 1

    print(all_predictions)
    print(all_targets)

    # Filter predictions and targets for the cup class (class index 1)
    cup_class_index = 1
    cup_predictions = [pred for pred in all_predictions if 'labels' in pred and (pred['labels'] == cup_class_index).any()]
    cup_targets = [target for target in all_targets if 'labels' in target and (target['labels'] == cup_class_index).any()]

    print(cup_predictions)
    print(cup_targets)

    # Compute IoU between cup predictions and targets
    if len(cup_predictions) == 0 or len(cup_targets) == 0:
        return 0.0  # No predictions or targets for the cup class

    iou = box_iou(torch.stack([pred['boxes'] for pred in cup_predictions]),
                  torch.stack([target['boxes'] for target in cup_targets]))

    # Calculate true positives and false positives
    true_positives = (iou > iou_threshold).sum(dim=0).clamp(max=1)
    false_positives = 1 - true_positives

    # Calculate precision and recall
    precision = true_positives.cumsum(dim=0) / (true_positives + false_positives).cumsum(dim=0)
    recall = true_positives / true_positives.numel()

    # Calculate Average Precision
    average_precision = (precision * recall).sum()

    return average_precision.item()


ap = calculate_average_precision(model, test_loader)
print(f"Average Precision: {ap:.4f}")