In [1]:
import os
from PIL import Image
import matplotlib.pyplot as plt


# Define path to your dataset (images and labels are directly in the 'augmented' folder)
dataset_path = '../brain_tumor/data/augmented/'

# List all image files (assuming .jpg images)
image_files = [f for f in os.listdir(dataset_path) if f.endswith('.jpg')]

# Check first image and its label
image_file = image_files[0]
image_path = os.path.join(dataset_path, image_file)
label_path = os.path.join(dataset_path, image_file.replace('.jpg', '.txt'))

# Open the image
image = Image.open(image_path)

# Read the label
with open(label_path, 'r') as f:
    label_data = f.readlines()

# Show image and print label data
image.show()
print(f"Label data for {image_file}:")
for line in label_data:
    print(line.strip())


Mounted at /content/drive
Label data for 76 (9).jpg:
1 0.661385 0.353873 0.127934 0.181925


In [2]:
import os

# Path to the dataset
dataset_path = '../brain_tumor/data/augmented/'

# Iterate through all files in the directory
for filename in os.listdir(dataset_path):
    # Check if the file is an image
    if filename.endswith('.jpg'):
        # Corresponding label file for the image
        label_file = filename.replace('.jpg', '.txt')
        label_path = os.path.join(dataset_path, label_file)

        # If no corresponding label file exists, delete the image
        if not os.path.exists(label_path):
            image_path = os.path.join(dataset_path, filename)
            os.remove(image_path)
            print(f"Deleted image: {filename}")


In [3]:
# Convert YOLO format to Faster R-CNN format
def convert_yolo_to_fasterrcnn(label_data, image_width, image_height):
    boxes = []
    labels = []

    for line in label_data:
        parts = list(map(float, line.strip().split()))
        label = int(parts[0])
        if label == 0:
            continue  # Skip no-tumor images

        # Extract YOLO format: x_center, y_center, width, height
        x_center, y_center, width, height = parts[1:]

        # Convert YOLO to pixel coordinates
        xmin = (x_center - width / 2) * image_width
        xmax = (x_center + width / 2) * image_width
        ymin = (y_center - height / 2) * image_height
        ymax = (y_center + height / 2) * image_height

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)  # 1 for tumor

    return boxes, labels


In [4]:
import os
import torch
from PIL import Image

# Function to load all images and labels, and convert YOLO to Faster R-CNN format
def load_and_convert(dataset_path):
    images = []
    targets = []

    # List all image files in the directory
    image_files = [f for f in os.listdir(dataset_path) if f.endswith('.jpg')]

    for image_file in image_files:
        # Get the corresponding label file
        label_file = image_file.replace('.jpg', '.txt')

        image_path = os.path.join(dataset_path, image_file)
        label_path = os.path.join(dataset_path, label_file)

        # Open image
        image = Image.open(image_path)
        image_width, image_height = image.size

        # Read label data
        with open(label_path, 'r') as f:
            label_data = f.readlines()

        # Convert YOLO to Faster R-CNN format
        boxes, labels = convert_yolo_to_fasterrcnn(label_data, image_width, image_height)

        # If no boxes were returned, add a "dummy" empty box and label (no tumor)
        if len(boxes) == 0:
            # Add an empty box and label
            boxes = [[0.0, 0.0, 0.0, 0.0]]  # This is an "empty" box
            labels = [0]  # Label 0 means no tumor

        # Store the image and its corresponding target (bounding boxes and labels)
        images.append(image)
        targets.append({'boxes': torch.tensor(boxes, dtype=torch.float32), 'labels': torch.tensor(labels, dtype=torch.int64)})

    return images, targets

# Load and convert all images and labels
dataset_path = '../brain_tumor/data/augmented/'
images, targets = load_and_convert(dataset_path)

# Example: Print the number of images and check a target
print(f"Number of images: {len(images)}")
print(f"Example target (bounding boxes and labels) for the first image: {targets[1]}")


Number of images: 878
Example target (bounding boxes and labels) for the first image: {'boxes': tensor([[122.5917, 125.5962, 216.9390, 207.9247]]), 'labels': tensor([1])}


In [5]:
# Example: Print the number of images and check a target
print(f"Number of images: {len(images)}")
print(f"Example target (bounding boxes and labels) for the first image: {targets[9]}")


Number of images: 878
Example target (bounding boxes and labels) for the first image: {'boxes': tensor([[0., 0., 0., 0.]]), 'labels': tensor([0])}


In [6]:
from torch.utils.data import random_split

# Define the split ratio
train_size = int(0.8 * len(images))
test_size = len(images) - train_size

# Split dataset into training and testing sets
train_images, test_images = random_split(images, [train_size, test_size])
train_targets, test_targets = random_split(targets, [train_size, test_size])

# You can optionally check the sizes of the datasets
print(f"Training dataset size: {len(train_images)}")
print(f"Testing dataset size: {len(test_images)}")


Training dataset size: 702
Testing dataset size: 176


In [7]:
import torch
from torch.utils.data import Dataset
from PIL import Image

class BrainTumorDataset(Dataset):
    def __init__(self, images, targets, transform=None):
        self.images = images
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Get the image and its corresponding target (boxes and labels)
        image = self.images[idx]
        target = self.targets[idx]

        # Ensure target is a dictionary with 'boxes' and 'labels'
        target = {
            'boxes': target['boxes'].float(),
    'labels': target['labels'].long()
        }

        # Apply transformations (if any)
        if self.transform:
            image = self.transform(image)

        return image, target


In [8]:
from torchvision import transforms

# Define the transformations for grayscale images
transform = transforms.Compose([
    transforms.Resize((800, 800)),  # Resize images to a fixed size (optional)
    transforms.Grayscale(num_output_channels=1),  # Ensure the image is grayscale (single channel)
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.Normalize(mean=[0.485], std=[0.229])  # Normalize with grayscale values
])

# Apply transformations to the images
train_dataset = BrainTumorDataset(images=train_images, targets=train_targets, transform=transform)
test_dataset = BrainTumorDataset(images=test_images, targets=test_targets, transform=transform)


In [9]:
def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]  # keep as list of dicts
    return torch.stack(images, dim=0), targets


In [10]:
from torch.utils.data import DataLoader

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)


In [11]:
for images_batch, targets_batch in train_loader:
    boxes_first_img = targets_batch[0]['boxes']  # Tensor [N, 4]
    labels_first_img = targets_batch[0]['labels']  # Tensor [N]


In [16]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader

# --- Model Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load Faster R-CNN pre-trained on COCO
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Modify the classifier head for 2 classes (tumor + background)
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
    in_features, num_classes
)

model.to(device)

# Optimizer with a lower learning rate
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


# --- Training Loop ---
num_epochs = 10  # Set how many epochs you want to train

for epoch in range(num_epochs):
    model.train()

    for images_batch, targets_batch in train_loader:
        # Move images to device
        images_batch = list(image.to(device) for image in images_batch)

        # Clean and move targets
        cleaned_targets = []
        for i in range(len(targets_batch)):
            boxes = targets_batch[i]['boxes']
            labels = targets_batch[i]['labels']

            valid_boxes = []
            valid_labels = []

            for box, label in zip(boxes, labels):
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min

                if width > 0 and height > 0:
                    valid_boxes.append(box)
                    valid_labels.append(label)

            if len(valid_boxes) == 0:
                cleaned_target = {
                    'boxes': torch.empty((0, 4), dtype=torch.float32).to(device),
                    'labels': torch.empty((0,), dtype=torch.int64).to(device)
                }
            else:
                cleaned_target = {
                    'boxes': torch.stack(valid_boxes).float().to(device),
                    'labels': torch.tensor(valid_labels, dtype=torch.int64).to(device)
                }

            cleaned_targets.append(cleaned_target)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images_batch, cleaned_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        losses.backward()
        optimizer.step()

        print(f"Epoch {epoch+1}, Loss: {losses.item()}")

    # Step the scheduler
    lr_scheduler.step()

print("Training complete!")


Epoch 1, Loss: 0.9835421442985535
Epoch 1, Loss: 0.9242234826087952
Epoch 1, Loss: 0.985794186592102
Epoch 1, Loss: 0.8307316899299622
Epoch 1, Loss: 0.9542766213417053
Epoch 1, Loss: 0.9417815208435059
Epoch 1, Loss: 0.8876802325248718
Epoch 1, Loss: 0.8934490084648132
Epoch 1, Loss: 0.923782229423523
Epoch 1, Loss: 0.8899625539779663
Epoch 1, Loss: 1.1418803930282593
Epoch 1, Loss: 0.9858635663986206
Epoch 1, Loss: 0.9130094647407532
Epoch 1, Loss: 0.8722971081733704
Epoch 1, Loss: 0.9042437076568604
Epoch 1, Loss: 0.892036497592926
Epoch 1, Loss: 0.8992630839347839
Epoch 1, Loss: 0.9612622261047363
Epoch 1, Loss: 0.865350067615509
Epoch 1, Loss: 1.0098448991775513
Epoch 1, Loss: 0.9217138290405273
Epoch 1, Loss: 0.8912782073020935
Epoch 1, Loss: 1.0129318237304688
Epoch 1, Loss: 0.8902310729026794
Epoch 1, Loss: 0.8787598013877869
Epoch 1, Loss: 0.8688774108886719
Epoch 1, Loss: 0.9241093993186951
Epoch 1, Loss: 0.8839316964149475
Epoch 1, Loss: 0.9152483344078064
Epoch 1, Loss: 0.9

In [18]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Initialize lists for the true and predicted labels
true_labels = []
pred_labels = []

# Loop through the test data
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    for images_batch, targets_batch in test_loader:
        images_batch = images_batch.to(device)
        targets_batch = [{k: v.to(device) for k, v in t.items()} for t in targets_batch]

        # Get the predictions from the model
        output = model(images_batch)

        for i in range(len(output)):
            # Get the predicted labels and the true labels
            pred_boxes = output[i]['boxes']
            pred_scores = output[i]['scores']
            pred_labels_img = output[i]['labels']

            true_labels_img = targets_batch[i]['labels']

            # Use only the predictions with a high enough score (e.g., score > 0.5)
            high_score_preds = pred_scores > 0.5
            pred_labels_img = pred_labels_img[high_score_preds]

            # Add the predicted labels and true labels to the respective lists
            true_labels.extend(true_labels_img.cpu().numpy())
            pred_labels.extend(pred_labels_img.cpu().numpy())

# Now calculate Precision, Recall, and F1-Score
if len(pred_labels) > 0 and len(true_labels) > 0:
    precision = precision_score(true_labels, pred_labels, pos_label=1)
    recall = recall_score(true_labels, pred_labels, pos_label=1)
    f1 = f1_score(true_labels, pred_labels, pos_label=1)

    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
else:
    print("No predictions made or all predictions were negative.")


ValueError: Found input variables with inconsistent numbers of samples: [182, 17600]