# CNN Model for region-of-interest (ROI)



## Preprocessing

### Imports

In [None]:
import pandas as pd
from pathlib import Path
import shutil

In [None]:
meta_dir = "./roi_data/metadata"
image_dir = "./roi_data/images"
train_path = "./roi_data/roi_images/train"
test_path = "./roi_data/roi_images/test"

In [None]:
Path(train_path + "/benign").mkdir(parents=True, exist_ok=True)
Path(train_path + "/malignant").mkdir(parents=True, exist_ok=True)
Path(test_path + "/benign").mkdir(parents=True, exist_ok=True)
Path(test_path + "/malignant").mkdir(parents=True, exist_ok=True)

In [None]:
dicom_data = pd.read_csv(meta_dir + '/dicom_info.csv')
train_csv = pd.read_csv(meta_dir + '/mass_case_description_train_set.csv')
test_csv = pd.read_csv(meta_dir + '/mass_case_description_test_set.csv')
train_csv = train_csv[["patient_id", "pathology", "ROI mask file path"]]
test_csv = test_csv[["patient_id", "pathology", "ROI mask file path"]]

In [None]:
dicom_data = dicom_data[["PatientID", "SeriesDescription", "image_path"]]

In [None]:
images = dicom_data[dicom_data.SeriesDescription == 'ROI mask images']
train_images = images[images.PatientID.isin(train_csv['ROI mask file path'].apply(lambda x: x.split('/')[0]))]
test_images = images[images.PatientID.isin(test_csv['ROI mask file path'].apply(lambda x: x.split('/')[0]))]


In [None]:
train_images = train_images.sort_values(by=['PatientID']).reset_index(drop=True)
# Concatenate train_images and train_csv
train_images = pd.concat([train_images, train_csv], axis=1)
train_images.drop(["patient_id", "SeriesDescription", "ROI mask file path"], axis=1, inplace=True)

test_images = test_images.sort_values(by=['PatientID']).reset_index(drop=True)
# Concatenate test_images and test_csv
test_images = pd.concat([test_images, test_csv], axis=1)
test_images.drop(["patient_id", "SeriesDescription", "ROI mask file path"], axis=1, inplace=True)

In [None]:
train_images_paths = train_images.image_path.apply(lambda x: x.replace('CBIS-DDSM/jpeg', ''))
test_images_paths = test_images.image_path.apply(lambda x: x.replace('CBIS-DDSM/jpeg', ''))
train_labels = train_images.pathology.apply(lambda x: x.lower())
test_labels = test_images.pathology.apply(lambda x: x.lower())

In [None]:
def copy_images(curr_paths, dest_paths, curr_labels):
    for i, path in enumerate(curr_paths):
        dir_path = path.split("/")[1]
        label = curr_labels[i]
        if label == "benign_without_callback":
            label = "benign"
        new_path = dest_paths + "/" + label
        Path(new_path + "/" + dir_path).mkdir(parents=True, exist_ok=True)
        shutil.copy(image_dir + path, new_path + path)


In [None]:
copy_images(train_images_paths, train_path, train_labels)
copy_images(test_images_paths, test_path, test_labels)

# Model

## Image loading

### Imports

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

### Parameters

In [None]:
batch_size = 4
train_path = "./roi_data/roi_images/train"
test_path = "./roi_data/roi_images/test"

### Load and transform the images


In [None]:
transformer = transforms.Compose([transforms.Resize((444, 222)), transforms.ToTensor(), transforms.Grayscale(1)])

In [None]:
train_imagefolder = torchvision.datasets.ImageFolder(root=train_path, transform=transformer)
train_loader = torch.utils.data.DataLoader(train_imagefolder, batch_size=batch_size, shuffle=True, num_workers=0)

In [None]:
test_imagefolder = torchvision.datasets.ImageFolder(root=test_path, transform=transformer)
test_loader = torch.utils.data.DataLoader(test_imagefolder, batch_size=batch_size, shuffle=False, num_workers=0)

## Neural network

### Imports

In [None]:
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 53 * 26, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)
    
    def forward(self, x):
        x = self.pool(functional.relu(self.conv1(x)))
        x = self.pool(functional.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = functional.relu(self.fc1(x))
        x = functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
net = Network()

In [None]:
criterion = nn.CrossEntropyLoss()
optimiser = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        optimiser.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimiser.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0