#Citation

@ARTICLE{Veeling2018-qh,
  title         = "Rotation Equivariant {CNNs} for Digital Pathology",
  author        = "Veeling, Bastiaan S and Linmans, Jasper and Winkens, Jim and
                   Cohen, Taco and Welling, Max",
  month         =  jun,
  year          =  2018,
  archivePrefix = "arXiv",
  primaryClass  = "cs.CV",
  eprint        = "1806.03962"
}

In [1]:
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import h5py
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
import gc


In [5]:
# Function to load data in batches
def load_data_in_batches(file_path_x, file_path_y, batch_size):
    with h5py.File(file_path_x, 'r') as data_file_x, h5py.File(file_path_y, 'r') as data_file_y:
        data_x = data_file_x['x']
        data_y = data_file_y['y'][:]
        total_samples = len(data_x)
        num_batches = total_samples // batch_size

        for i in range(num_batches):
            start_idx = i * batch_size
            end_idx = min((i + 1) * batch_size, total_samples)
            yield data_x[start_idx:end_idx], data_y[start_idx:end_idx].reshape(-1)  # Reshape labels


In [6]:
# Load train data in batches
train_batches = load_data_in_batches('/content/drive/My Drive/patch/camelyonpatch_level_2_split_train_x.h5', '/content/drive/My Drive/patch/camelyonpatch_level_2_split_train_y.h5', batch_size=65536)


In [7]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(96, 96, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    BatchNormalization(),

    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [9]:
for batch_x, batch_y in train_batches:
    # Train the model using each batch
    for epoch in range(7):
        # Prepare validation data for each epoch
        with h5py.File('/content/drive/My Drive/patch/camelyonpatch_level_2_split_valid_x.h5', 'r') as valid_x_file:
            valid_x = valid_x_file['x'][:]
        with h5py.File('/content/drive/My Drive/patch/camelyonpatch_level_2_split_valid_y.h5', 'r') as valid_y_file:
            valid_y = valid_y_file['y'][:].reshape(-1)  # Reshape validation labels

        # Train the model with current batch and validation data
        model.fit(batch_x, batch_y, epochs=1, batch_size=32, validation_data=(valid_x, valid_y))

        # Clear memory after each epoch
        del valid_x, valid_y
        gc.collect()

    # Clear memory after each batch
    del batch_x, batch_y
    gc.collect()



In [11]:
# Load test data
with h5py.File('/content/drive/My Drive/patch/camelyonpatch_level_2_split_test_x.h5', 'r') as test_x_file:
    test_x = test_x_file['x'][:]
with h5py.File('/content/drive/My Drive/patch/camelyonpatch_level_2_split_test_y.h5', 'r') as test_y_file:
    test_y = test_y_file['y'][:].reshape(-1)  # Reshape test labels


In [12]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_x, test_y, batch_size=32)
print(f'Test accuracy: {test_acc}')

Test accuracy: 0.771728515625
