In [1]:
import os
import cv2
import torch
import torch.nn as nn
import torchvision
import pandas as pd
from torchvision import transforms
from torchvision.models import resnet50
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [2]:
IMG_WIDTH = 139
IMG_HEIGHT = 132

In [3]:
base_model = resnet50(pretrained=True)

base_model = nn.Sequential(*list(base_model.children())[:-1])



In [20]:
class CustomModel(nn.Module):
    def __init__(self, base_model, num_classes=1):
        super().__init__()
        self.base_model = base_model
        self.base_model.conv1 = nn.Conv2d(3, 256, kernel_size=(132, 139))
        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(2048, 4) # Bounding box
        self.fc2 = nn.Linear(2048, num_classes) # Class probabilities
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.base_model(x)
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        bbox = self.fc1(x) # Returned format: [x1, y1, x2, y2]
        bbox = torch.stack([(bbox[:, 0] + bbox[:, 2]) / 2, (bbox[:, 1] + bbox[:, 3]) / 2, bbox[:, 2] - bbox[:, 0], bbox[:, 3] - bbox[:, 1]], dim=1) # Convert to [x1, y1, w, h]
        class_prob = self.softmax(self.fc2(x))
        return bbox, class_prob
        

In [24]:
class TumorsDataset(Dataset):
    def __init__(self, images, labels_df, transform=None):
        self.images = images
        self.labels = torch.tensor(labels_df.values, dtype=torch.float32)
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img = self.images[idx]
        if self.transform:
            img = self.transform(img)
        img = img.permute(2, 0, 1)
        label = self.labels[idx]
        return img, label

In [6]:
def read_images(imgs_path):
    images = []
    
    for img_name in os.listdir(imgs_path):
        img = cv2.imread(os.path.join(imgs_path, img_name))
        images.append(img)
        
    return images

In [7]:
def read_cleaned_labels(lbls_path, imgs_path, images):
    labels = pd.DataFrame(columns=['xmin', 'ymin', 'xmax', 'ymax', 'size'])
    
    for i, file_path in enumerate(os.listdir(imgs_path)):
        file_path = file_path.replace('.jpg', '.txt')
        file_path = os.path.join(lbls_path, file_path)
        
        if not os.path.exists(file_path):
            with open(file_path, 'w') as file:
                pass
        
        with open(file_path, 'r') as file:
            line = file.readlines()
            if len(line) == 0: # if there are no tumors in the image
                labels.loc[i] = [0, 0, 0, 0, 0]
                continue
            line = line[0].split()
            idx, x, y, width, height = list(map(float, line))
            
            xmin = (x - width / 2) * IMG_WIDTH
            ymin = (y - height / 2) * IMG_HEIGHT
            xmax = (x + width / 2) * IMG_WIDTH
            ymax = (y + height / 2) * IMG_HEIGHT
            size = (width / IMG_WIDTH) * (height / IMG_HEIGHT)
            
            labels.loc[i] = [xmin, ymin, xmax, ymax, size]
            
    return labels

In [25]:
model = CustomModel(base_model)

for param in base_model.parameters():
    param.requires_grad = False
    
loss_fn = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters())

In [9]:
train_img_path = os.path.join(os.getcwd(), 'Clean', 'train', 'images')
train_label_path = os.path.join(os.getcwd(), 'Clean', 'train', 'labels')
print(train_img_path, train_label_path, sep='\n')

/Users/benarviv/Documents/OpenU/2024א/Data Science Workshop/Data Science Project/Brain-Tumor-Detection/Clean/train/images
/Users/benarviv/Documents/OpenU/2024א/Data Science Workshop/Data Science Project/Brain-Tumor-Detection/Clean/train/labels


In [10]:
train_images = torch.tensor(read_images(train_img_path))
train_labels = read_cleaned_labels(train_label_path, train_img_path, train_images)

  train_images = torch.tensor(read_images(train_img_path))


In [26]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_dataset = TumorsDataset(train_images, train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [28]:
num_epochs = 8

for epoch in tqdm(range(num_epochs)):
    model.train()
    for i, (imgs, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        imgs = imgs.to(torch.float32)
        labels = labels.to(torch.long) 
        bboxes, class_probs = model(imgs)
        loss = loss_fn(bboxes)
        loss.backward()
        optimizer.step()
        
        if i % 10 == 0:
            print(f'Epoch: {epoch}, Batch: {i}, Loss: {loss.item()}')

  0%|          | 0/8 [00:04<?, ?it/s]


RuntimeError: Given groups=1, weight of size [256, 3, 132, 139], expected input[64, 2048, 1, 1] to have 3 channels, but got 2048 channels instead