## Environment Setup

In [0]:
!rm -rf data

In [0]:
%%capture
!pip install pydicom

In [0]:
# Mount Google Drive to Session
import os
from google.colab import drive
if not os.path.isdir('drive'):
  drive.mount('/content/drive')

In [0]:
# Load standard modules
import os
import shutil
import tarfile
import requests
from io import StringIO

# Load data modules
import pandas as pd
# import pydicom as dicom
import numpy as np

# Load image and plotting modules
import cv2
import plistlib
from PIL import Image
import matplotlib.pyplot as plt
from skimage.draw import polygon

%tensorflow_version 2.x
import tensorflow as tf

In [0]:
%%capture
!unzip /content/drive/'My Drive'/'Breast Cancer Detection'/breast_cancer_data.zip

## Importing and Processing Datasets for Breast Cancer Detection

Datasets involved are: 

1.   **MIAS**: 322 images for 161 cases, labels for malignant, benign, and normal cases, additionnal bounding-circle annotations for abnormalities.
2.   **INbreast:** 410 images for 115 cases (90 cases * 4 images + 25 cases * 2 images)


In [0]:
def create_dir(dir_path, verbose=True):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        if verbose:
            print(f'Created directory at {dir_path}.')
    else:
        if verbose:
            print(f'Directory exists at {dir_path}.')


def download(url, fname):
    r = requests.get(url, stream=True)
    with open(fname, 'wb') as f:
        chunk_count = 1
        chunk_size = 1024
        total_length = int(r.headers.get('content-length'))
        for chunk in r.iter_content(chunk_size=chunk_size): 
            if chunk:
                print(f'Downloading file: {chunk_count*chunk_size}/{total_length}', end='\r')
                f.write(chunk)
                f.flush()
                chunk_count += 1
        print(f'Downloaded file: {total_length}/{total_length}')


def load_inbreast_mask(mask_path, imshape=(4084, 3328)):
    """
    This function loads a osirix xml region as a binary numpy array for INBREAST
    dataset
    @mask_path : Path to the xml file
    @imshape : The shape of the image as an array e.g. [4084, 3328]
    return: numpy array where positions in the roi are assigned a value of 1.
    """

    mask = np.zeros(imshape)
    with open(mask_path, 'rb') as mask_file:
        plist_dict = plistlib.load(mask_file, fmt=plistlib.FMT_XML)['Images'][0]
        numRois = plist_dict['NumberOfROIs']
        rois = plist_dict['ROIs']
        assert len(rois) == numRois
        for roi in rois:
            numPoints = roi['NumberOfPoints']
            points = roi['Point_px']
            assert numPoints == len(points)
            points = [eval(point) for point in points]
            if len(points) <= 2:
                for point in points:
                    mask[int(point[1]), int(point[0])] = 1
            else:
                x, y = zip(*points)
                col, row = np.array(x), np.array(y) ##x coord is the column coord in an image and y is the row
                poly_x, poly_y = polygon(row, col, shape=imshape)
                mask[poly_x, poly_y] = 1
    return mask

In [0]:
root_data_dir = '/content/data'
create_dir(root_data_dir)

### MIAS Dataset Downloading and Preprocessing

In [0]:
dataset_name = 'MIAS'
home_url = 'http://peipa.essex.ac.uk/info/mias.html'
download_url = 'http://peipa.essex.ac.uk/pix/mias/all-mias.tar.gz'
filename = '/content/data/all-mias.tar.gz'

# Create dataset directory
dataset_path = os.path.join(root_data_dir, dataset_name)
create_dir(dataset_path)

# Download and extract dataset
download(download_url, filename)
shutil.unpack_archive(filename, dataset_path)
os.remove(filename)

# Convert images to JPG format
mias_images = os.path.join(dataset_path, 'images')
create_dir(mias_images)
for file in os.listdir(dataset_path):
    if file.endswith('.pgm'):
        filepath = os.path.join(dataset_path, file)
        dstpath = os.path.join(mias_images, file.replace('.pgm', '.jpg'))
        img = Image.open(filepath)
        img.convert('RGB').save(dstpath)
        os.remove(filepath)

# Convert labels to CSV format
with open(os.path.join(dataset_path, 'Info.txt')) as f:
    r = f.read()
df = pd.read_csv(StringIO(r.split('='*65)[-2].strip()), 
                 sep=' ', 
                 header=None,
                 names=['refname', 'tissue', 'abnormality', 'severity', 'x', 'y', 'radius'])
annotation_path = os.path.join(dataset_path, 'annotations.csv')
df.to_csv(annotation_path, index=False)
df.head()

# Cleanup
os.remove(os.path.join(dataset_path, 'README'))
os.remove(os.path.join(dataset_path, 'Info.txt'))
os.remove(os.path.join(dataset_path, 'Licence.txt'))

### INbreast dataset downloading and preprocessing

In [0]:
# dataset_name = 'INbreast'
# download_url = 'https://drive.google.com/file/d/19n-p9p9C0eCQA1ybm6wkMo-bbeccT_62/view?usp=sharing'
# githuburl = 'https://github.com/wentaozhu/deep-mil-for-whole-mammogram-classification/issues/12'
# archive_path = "/content/drive/My Drive/Breast Cancer Detection/INbreast Release 1.0.zip"

# # download dataset and move to data directory
# dataset_path = os.path.join(root_data_dir, dataset_name)
# create_dir(dataset_path)
# !unzip /content/drive/'My Drive'/'Breast Cancer Detection'/'INbreast Release 1.0.zip' -d /data > /log.txt

# Preprocess images
inbreast_dcm = '/data/INbreast Release 1.0/AllDICOMs'
inbreast_imgs = '/content/data/INbreast/images'
create_dir(inbreast_imgs)
for file in os.listdir(inbreast_dcm):
    if file.endswith('.dcm'):
        ds = dicom.dcmread(os.path.join(inbreast_dcm, file), force=True)
        dst_file = os.path.join(inbreast_imgs, file.replace('.dcm', '.jpg')) 
        Image.fromarray(ds.pixel_array.astype('uint8')).save(dst_file)

# Preprocess labels
df = pd.read_excel(os.path.join('/data/INbreast Release 1.0', 'INbreast.xls'))
replacing = {'Lesion Annotation Status': {'No annotation (Normal)': 'no', 'no annotation (normal)': 'no', 'Spiculated Region': 'yes', np.nan: 'yes'}}
df = df.dropna(subset=['File Name']).replace(replacing)
df['File Name'] = df['File Name'].astype(int)
df.to_csv(os.path.join(dataset_path, 'labels.csv'), index=False)
df.head()

# Preprocess annotations
annotations = [] 
masks_path = os.path.join(dataset_path, 'masks')
create_dir(masks_path)
annotation_file_ids = df[df['Lesion Annotation Status'] == 'yes']['File Name'].astype(str).to_list()
for _id in annotation_file_ids:
  # Load annotation file
  inbreast_xml = '/data/INbreast Release 1.0/AllXML'
  xml_path = [os.path.join(inbreast_xml, f) for f in os.listdir(inbreast_xml) if f.startswith(str(_id))][0]
  # Load image file
  image_path = [os.path.join(inbreast_imgs, f) for f in os.listdir(inbreast_imgs) if f.startswith(str(_id))][0]
  image = cv2.imread(image_path)
  # Create mask from annotation and save it
  mask = load_inbreast_mask(xml_path, image.shape[:2])
  cv2.imwrite(os.path.join(masks_path, str(_id) + '_mask.jpg'), mask*255)
  # Find contours
  contours, hierarchy = cv2.findContours((mask*255).astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  for cnt in contours:
    # Approximate contour bounding box
    epsilon = 0.01*cv2.arcLength(cnt, True)
    approx = cv2.approxPolyDP(cnt, epsilon, True)
    x,y,w,h = cv2.boundingRect(cnt)
    annotations.append((_id, x, y, x+w, y+h))

# Create bounding box annotation dataframe
df_annotations = pd.DataFrame.from_records(annotations, columns =['fileid', 'xmin', 'ymin', 'xmax', 'ymax'])
df_annotations.to_csv(os.path.join(dataset_path, 'annotations.csv'), index=False)

# Cleanup
!rm -rf /data/'INbreast Release 1.0'

### Finalizing

In [0]:
%%capture
!zip -r /content/drive/'My Drive'/'Breast Cancer Detection'/breast_cancer_data.zip data

In [0]:
!rm -rf data
!rm breast_cancer_data.zip

rm: cannot remove 'breast_cancer_data.zip': No such file or directory


## Set up Models: Deep Convolutional Neural Networks for breast cancer screening

In [0]:
def create_base_model(base, input_shape):

    if base == 'VGG16':
        # Create the base model from the pre-trained model VGG16
        base_model = tf.keras.applications.VGG16(input_shape=input_shape,
                                                include_top=False,
                                                weights='imagenet')
    elif base == 'InceptionV3':
        # Create the base model from the pre-trained model Inception V3
        base_model = tf.keras.applications.InceptionV3(input_shape=input_shape,
                                                      include_top=False,
                                                      weights='imagenet')
    elif base == 'ResNet50':
        # Create the base model from the pre-trained model ResNet50
        base_model = tf.keras.applications.ResNet50(input_shape=input_shape,
                                                    include_top=False,
                                                    weights='imagenet')
        
    base_model.trainable = False

    return base_model

def create_model(base_model, num_classes):

    model = tf.keras.Sequential([
      base_model,
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(256, 
                            activation='relu', 
                            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      tf.keras.layers.Dense(128, 
                            activation='relu',
                            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      tf.keras.layers.Dropout(0.5),
      tf.keras.layers.Dense(64, 
                            activation='relu',
                            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      tf.keras.layers.Dense(32, 
                            activation='relu',
                            kernel_regularizer=tf.keras.regularizers.l2(0.01)),
      tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

    return model

def freeze_layers(base_model, base, finetune_blocks):

    block_mapping = {
        'VGG16': {
            0: None,
            1: 7,
            2: 11,
            3: 15,
        },

        'InceptionV3': {
            0: None,
            1: 219,
            2: 494,
            3: 15
        },

         'ResNet50': {
            0: None,
            1: 229,
            2: 249,
            3: 279
        },             
    }

    # Unfreeze all layers
    base_model.trainable = True

    # Fine-tune from this layer onwards
    fine_tune_at = block_mapping[base][finetune_blocks]

    # Freeze all the layers before the `fine_tune_at` layer
    for layer in base_model.layers[:fine_tune_at]:
      layer.trainable =  False
    
    return base_model

def load_data(train_df, valid_df, xcol, ycol, img_size, batch_size):
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.25,
        height_shift_range=0.25,
        shear_range=0.5,
        zoom_range=[0.5, 1.5],
        horizontal_flip=True,
        fill_mode='nearest')

    validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255)

    train_generator = train_datagen.flow_from_dataframe(
        dataframe = train_df,
        x_col = xcol,
        y_col = ycol,
        target_size = (img_size, img_size),
        batch_size = batch_size,
        class_mode = 'categorical')

    valid_generator = validation_datagen.flow_from_dataframe(
        dataframe = valid_df,
        x_col = xcol,
        y_col = ycol,
        target_size = (img_size, img_size),
        batch_size = batch_size,
        class_mode = 'categorical')

    return train_generator, valid_generator
  
def compile_model(model, base_lr=0.0001):
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=base_lr),
                  loss=tf.keras.losses.CategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                          monitor='val_loss',
                          factor=0.1,
                          patience=10,
                          min_delta=0.0001,
                          verbose=1,
                          min_lr=0.0000001
                          )

def train_model(params):

    batch_size = params['batch_size']
    img_size = params['img_size']
    img_shape = (img_size, img_size, 3)
    num_classes = params['num_classes']
    epochs = params['epochs']
    base_lr = params['base_lr']
    base = params['base']
    finetune_blocks = params['finetune_blocks']


    # FOR MIAS DATASET
    if params['dataset'] == 'MIAS':
        xcol = 'dstpath'
        ycol = 'severity'
        df = pd.read_csv('/content/data/MIAS/annotations0.csv')
        df_train = pd.concat([df[df['severity'] == 'B'][:-7], 
                              df[df['severity'] == 'M'][:-7]])
        df_valid = pd.concat([df[df['severity'] == 'B'][-12:], 
                              df[df['severity'] == 'M'][-12:]])
    # FOR INBREAST DATASET
    elif params['dataset'] == 'INbreast':
        xcol = 'filepath'
        ycol = 'cancer'
        df = pd.read_csv('/content/data/INbreast/annotations0.csv')
        df_train = pd.concat([df[df['cancer'] == 0][:-40], 
                              df[df['cancer'] == 1][:-200]])
        df_valid = pd.concat([df[df['cancer'] == 1][-40:], 
                              df[df['cancer'] == 0][-200:]])

    train_gen, valid_gen = load_data(df_train,
                                    df_valid,
                                    xcol,
                                    ycol,
                                    img_size,
                                    batch_size)



    base_model = create_base_model(base, img_shape)
    base_model = freeze_layers(base_model, base, finetune_blocks)
    model = create_model(base_model, num_classes)
    model = compile_model(model, base_lr)

    history = model.fit(
                    train_gen,
                    steps_per_epoch = train_gen.samples//batch_size,
                    epochs=epochs,
                    validation_data = valid_gen,
                    validation_steps = valid_gen.samples//batch_size,
                    callbacks = [reduce_lr]
                    )
    return history

def plot(hist):
    plt.plot(hist.epoch, hist.history['val_loss'])
    plt.plot(hist.epoch, hist.history['loss'])
    plt.xlabel('epochs'); plt.ylabel('loss'); plt.legend(['training loss', 'validation loss'])
    plt.show()
    plt.plot(hist.epoch, hist.history['val_accuracy'])
    plt.plot(hist.epoch, hist.history['accuracy'])
    plt.xlabel('epochs'); plt.ylabel('accuracy'); plt.legend(['training acc', 'validation acc'])
    plt.show()

## Train MIAS

### Load Dataset

In [0]:
if not os.path.isdir('/content/data/MIAS/cropped'):
    os.makedirs('/content/data/MIAS/cropped')
df = pd.read_csv('/content/data/MIAS/annotations.csv')
df['imgpath'] = '/content/data/MIAS/images/' + df['refname'] + '.jpg'
df['dstpath'] = [s.replace('/images/', '/cropped/') for s in df.imgpath.to_list()]
df_mod = df.dropna(subset=['x'])
df_mod.to_csv('/content/data/MIAS/annotations0.csv', index=False)
for row in df_mod.itertuples():
    if not pd.isna(row.radius):
        img = cv2.imread(row.imgpath)
        height, width = img.shape[:2]
        xc, yc = int(row.x), int(row.y)
        xmin = max(xc - (224/2), 0)
        xmax = min(xc + (224/2), width)
        xmin += (224/2) - (xmax - xc)
        xmax += (224/2) - (xc - xmin)
        ymin = max(yc - (224/2), 0)
        ymax = min(yc + (224/2), height)
        ymin += (224/2) - (ymax-yc)
        ymax += (224/2) - (yc - ymin)
        xmin, xmax, ymin, ymax = int(xmin), int(xmax), int(ymin), int(ymax)
        img = img[ymin:ymax, xmin:xmax]
        cv2.imwrite(row.dstpath, img)
        # x_min = int(row['x'])- int(1.2*int(row['radius']))
        # x_max = int(row['x'])+int(1.2*int(row['radius']))
        # y_min = int(row['y'])-int(1.2*int(row['radius']))
        # y_max = int(row['y'])+int(1.2*int(row['radius']))
        # xmin = min([x_min, 0])
        # xmax = max([x_max, img.shape[0]])
        # ymin = min([y_min, 0])
        # ymax = max([y_max, img.shape[1]])
        # img = img[xmin:xmax, ymin:ymax]
        # cv2.imwrite(row['imgpath'], img)


### Finetune 0

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 0,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_0 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_0 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_0 = train_model(hyperparameters)

Found 105 validated image filenames belonging to 2 classes.
Found 23 validated image filenames belonging to 2 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 00022: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 00032: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 00042: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 00052: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
E

### Finetune 1

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 1,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_1 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_1 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_1 = train_model(hyperparameters)

Found 105 validated image filenames belonging to 2 classes.
Found 23 validated image filenames belonging to 2 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 00025: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 00035: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 00045: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 00064: Redu

### Finetune 2

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 2,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_2 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_2 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_2 = train_model(hyperparameters)

Found 105 validated image filenames belonging to 2 classes.
Found 23 validated image filenames belonging to 2 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 00033: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 00048: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 00058: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch

### Finetune 3

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 3,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_3 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_3 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_3 = train_model(hyperparameters)

### Visualize

                0           1           2           3
VGG16          50           57         50          71           
ResNet50       50           57         50          57
InceptionV3    57           64         43          64

In [0]:
print('======== VGG 16 : 0 Finetuning ========')
plot(hist_vgg_0)
print('======== VGG 16 : 1 Finetuning ========')
plot(hist_vgg_1)
print('======== VGG 16 : 2 Finetuning ========')
plot(hist_vgg_2)
print('======== VGG 16 : 3 Finetuning ========')
plot(hist_vgg_3)

print('======== ResNet50 : 0 Finetuning ========')
plot(hist_r50_0)
print('======== ResNet50 : 1 Finetuning ========')
plot(hist_r50_1)
print('======== ResNet50 : 2 Finetuning ========')
plot(hist_r50_2)
print('======== ResNet50 : 3 Finetuning ========')
plot(hist_r50_3)

print('======== InceptionV3 : 0 Finetuning ========')
plot(hist_iv3_0)
print('======== InceptionV3 : 1 Finetuning ========')
plot(hist_iv3_1)
print('======== InceptionV3 : 2 Finetuning ========')
plot(hist_iv3_2)
print('======== InceptionV3 : 3 Finetuning ========')
plot(hist_iv3_3)

## TRAIN INBREAST

### Load dataset

In [0]:
cropped_dir = '/content/data/INbreast/cropped/'
if not os.path.isdir(cropped_dir):
  os.makedirs(cropped_dir)

img_paths = {}
img_root = '/content/data/INbreast/images/'
for file in os.listdir(img_root):
    img_paths[int(file.split('_')[0])] = img_root + file

df_labels = pd.read_csv('/content/data/INbreast/labels.csv')
df_annotations = pd.read_csv('/content/data/INbreast/annotations.csv')
df_annotations['dx'] = [row.xmax - row.xmin for row in df_annotations.itertuples()]
df_annotations['dy'] = [row.ymax - row.ymin for row in df_annotations.itertuples()]
df_annotations = df_annotations[df_annotations.dx > 10][df_annotations.dy > 10]
df_annotations['yc'] = [int((row.ymax + row.ymin)/2) for row in df_annotations.itertuples()]
df_annotations['xc'] = [int((row.xmax + row.xmin)/2) for row in df_annotations.itertuples()]
df_annotations['cancer'] = [int(int(df_labels[df_labels['File Name'] == row.fileid]['Bi-Rads'].iloc[0][0]) > 3) for row in df_annotations.itertuples()]
df_annotations['filepath'] = [cropped_dir + str(fileid) + '.jpg' for fileid in df_annotations.fileid.to_list()]
df_annotations['ogpath'] = [img_paths[fileid] for fileid in df_annotations.fileid.to_list()]
df_annotations = df_annotations.sample(frac=1).reset_index(drop=True)
img_shapes = {}
for row in df_annotations.itertuples():
  img_shapes[row.fileid] = Image.open(row.ogpath).size
df_annotations['W'] = [img_shapes[fid][0] for fid in df_annotations.fileid.to_list()]
df_annotations['H'] = [img_shapes[fid][1] for fid in df_annotations.fileid.to_list()]
df_annotations = df_annotations[df_annotations.ymax < df_annotations.H]
df_annotations.to_csv('/content/data/INbreast/annotations0.csv', index=False)

  


In [0]:
df_annotations = pd.read_csv('/content/data/INbreast/annotations0.csv')
for row in df_annotations.itertuples():
    img = cv2.imread(row.ogpath)
    height, width = img.shape[:2]
    xc, yc = row.xc, row.yc
    xmin = max(xc - (224/2), 0)
    xmax = min(xc + (224/2), width)
    xmin += (224/2) - (xmax-xc)
    xmax += (224/2) - (xc - xmin)
    ymin = max(yc - (224/2), 0)
    ymax = min(yc + (224/2), height)
    ymin += (224/2) - (ymax-yc)
    ymax += (224/2) - (yc - ymin)
    xmin, xmax, ymin, ymax = int(xmin), int(xmax), int(ymin), int(ymax)
    # img = img[row.xmin:row.xmax, row.ymin:row.ymax]
    img = img[ymin:ymax, xmin:xmax]
    cv2.imwrite(row.filepath, img)

### Finetune 0

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 0,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'INbreast'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_0 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_0 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_0 = train_model(hyperparameters)



KeyError: ignored

### Finetune 1

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 1,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_1 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_1 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_1 = train_model(hyperparameters)

### Finetune 2

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 2,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_2 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_2 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_2 = train_model(hyperparameters)

### Finetune 3

In [0]:
hyperparameters = {
    'img_size': 224,
    'num_classes': 2,
    'base': 'InceptionV3',
    'finetune_blocks': 3,
    'base_lr': 0.0001,
    'epochs': 90,
    'batch_size': 16,
    'dataset': 'MIAS'
}

print(' ======== VGG16 ======== ')
hyperparameters['base'] = 'VGG16'
hist_vgg_3 = train_model(hyperparameters)

print(' ======== ResNet50 ======== ')
hyperparameters['base'] = 'ResNet50'
hist_r50_3 = train_model(hyperparameters)

print(' ======== InceptionV3 ======== ')
hyperparameters['base'] = 'InceptionV3'
hist_iv3_3 = train_model(hyperparameters)

### Visualize

In [0]:
print('======== VGG 16 : 0 Finetuning ========')
plot(hist_vgg_0)
print('======== VGG 16 : 1 Finetuning ========')
plot(hist_vgg_1)
print('======== VGG 16 : 2 Finetuning ========')
plot(hist_vgg_2)
print('======== VGG 16 : 3 Finetuning ========')
plot(hist_vgg_3)

print('======== ResNet50 : 0 Finetuning ========')
plot(hist_r50_0)
print('======== ResNet50 : 1 Finetuning ========')
plot(hist_r50_1)
print('======== ResNet50 : 2 Finetuning ========')
plot(hist_r50_2)
print('======== ResNet50 : 3 Finetuning ========')
plot(hist_r50_3)

print('======== InceptionV3 : 0 Finetuning ========')
plot(hist_iv3_0)
print('======== InceptionV3 : 1 Finetuning ========')
plot(hist_iv3_1)
print('======== InceptionV3 : 2 Finetuning ========')
plot(hist_iv3_2)
print('======== InceptionV3 : 3 Finetuning ========')
plot(hist_iv3_3)

## Attempt at installing and compiling py-faster-rcnn and caffe

Note that the Makefiles at each stage have to be tweaked depending on the environment. Good luck with that.

In [1]:
!apt install caffe-cuda

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  caffe-tools-cuda cython3 fonts-lyx javascript-common libblosc1
  libcaffe-cuda1 libcublas9.1 libcudart9.1 libcurand9.1 libgflags2.2
  libgoogle-glog0v5 libjs-jquery libjs-jquery-ui libleveldb1v5 liblmdb0
  python-matplotlib-data python-tables-data python3-bs4 python3-caffe-cuda
  python3-chardet python3-cycler python3-dateutil python3-decorator
  python3-gflags python3-h5py python3-html5lib python3-ipython
  python3-ipython-genutils python3-leveldb python3-lxml python3-matplotlib
  python3-networkx python3-nose python3-numexpr python3-olefile python3-pandas
  python3-pandas-lib python3-pexpect python3-pickleshare python3-pil
  python3-pkg-resources python3-prompt-toolkit python3-protobuf
  python3-ptyprocess python3-pygments python3-pyparsing python3-pywt
  python3-scipy python3-simplegeneric python3-six python3-skimage
  python3-skimage

In [2]:
!git clone --recursive https://github.com/rbgirshick/py-faster-rcnn.git
!cd py-faster-rcnn/lib && make
!cd py-faster-rcnn/caffe-fast-rcnn && make -j8 && make pycaffe
!cd py-faster-rcnn && ./data/scripts/fetch_faster_rcnn_models.sh

Cloning into 'py-faster-rcnn'...
remote: Enumerating objects: 1544, done.[K
remote: Total 1544 (delta 0), reused 0 (delta 0), pack-reused 1544[K
Receiving objects: 100% (1544/1544), 1.36 MiB | 1.27 MiB/s, done.
Resolving deltas: 100% (899/899), done.
Submodule 'caffe-fast-rcnn' (https://github.com/rbgirshick/caffe-fast-rcnn.git) registered for path 'caffe-fast-rcnn'
Cloning into '/content/py-faster-rcnn/caffe-fast-rcnn'...
remote: Enumerating objects: 23976, done.        
remote: Total 23976 (delta 0), reused 0 (delta 0), pack-reused 23976        
Receiving objects: 100% (23976/23976), 31.49 MiB | 9.32 MiB/s, done.
Resolving deltas: 100% (15785/15785), done.
Submodule path 'caffe-fast-rcnn': checked out '0dcd397b29507b8314e252e850518c5695efbb83'
python setup.py build_ext --inplace
Traceback (most recent call last):
  File "setup.py", line 58, in <module>
    CUDA = locate_cuda()
  File "setup.py", line 53, in locate_cuda
    for k, v in cudaconfig.iteritems():
AttributeError: 'dict' 

In [8]:
!cd py-faster-rcnn/caffe-fast-rcnn && make -j8 && make pycaffe

CXX .build_release/src/caffe/proto/caffe.pb.cc
CXX src/caffe/internal_thread.cpp
CXX src/caffe/common.cpp
CXX src/caffe/solvers/rmsprop_solver.cpp
CXX src/caffe/solvers/nesterov_solver.cpp
CXX src/caffe/solvers/adam_solver.cpp
CXX src/caffe/solvers/adadelta_solver.cpp
CXX src/caffe/solver.cpp
In file included from .build_release/src/caffe/proto/caffe.pb.cc:5:0:
.build_release/src/caffe/proto/caffe.pb.h:9:10: fatal error: google/protobuf/stubs/common.h: No such file or directory
 #include <google/protobuf/stubs/common.h>
          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
compilation terminated.
Makefile:570: recipe for target '.build_release/src/caffe/proto/caffe.pb.o' failed
make: *** [.build_release/src/caffe/proto/caffe.pb.o] Error 1
make: *** Waiting for unfinished jobs....
In file included from ./include/caffe/blob.hpp:8:0,
                 from ./include/caffe/net.hpp:10,
                 from ./include/caffe/solver.hpp:7,
                 from ./include/caffe/sgd_solvers.hpp:7,
         