In [4]:
!gdown --fuzzy https://drive.google.com/file/d/1tM16OAlYUrimMpHsSs7GB1QN6swYuvCx/view?usp=sharing

Downloading...
From (original): https://drive.google.com/uc?id=1tM16OAlYUrimMpHsSs7GB1QN6swYuvCx
From (redirected): https://drive.google.com/uc?id=1tM16OAlYUrimMpHsSs7GB1QN6swYuvCx&confirm=t&uuid=796ed1a8-1aae-42b9-a341-4a2d07443f32
To: /content/histopathologic-cancer-detection.zip
100% 6.77G/6.77G [01:09<00:00, 97.2MB/s]


In [5]:
!unzip /content/histopathologic-cancer-detection.zip

Archive:  /content/histopathologic-cancer-detection.zip
replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [6]:
pcam_directory = '/content/'

In [7]:
print(torch.cuda.is_available())

True


In [1]:
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageFilter
import tifffile as tiff
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
import torch.utils.model_zoo as model_zoo
from torchvision import transforms
from tqdm.notebook import tqdm
import logging as log
import IProgress

In [8]:
label_mapping = {}
with open(os.path.join(pcam_directory, 'train_labels.csv'), 'r') as f:
    reader = csv.reader(f)
    next(reader)  # To skip the header
    label_mapping = {slide_id: int(label) for [slide_id, label] in reader}

In [9]:
all_fps = [fp for fp in os.listdir(os.path.join(pcam_directory, 'train'))]
for fp in all_fps: assert fp[-4:] == '.tif', fp[-4:]

In [None]:
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from torch.utils.data import Dataset, DataLoader
import random

class PCamDataset(torch.utils.data.Dataset):
    def __init__(self, examples, transform=None):
        self.examples = examples
        self.transform = transform
        self.crop = 125

    def __getitem__(self, index):
        image_fp, label = self.examples[index]
        image = Image.open(image_fp)
        if self.transform is not None:
            image = self.transform(image)
        return image, torch.Tensor([label]).long()

    def __len__(self):
        return len(self.examples)

    def crop_image(self, image_data):
        w, h = image_data.shape[1], image_data.shape[0]
        startx = w // 2 - self.crop // 2
        starty = h // 2 - self.crop // 2
        return image_data[starty:starty + self.crop, startx:startx + self.crop]

    def update_exclusion_list(self, exclude_indices):
        """ Update the list of indices to exclude from the dataset. """
        self.exclude_indices = set(exclude_indices)

    def degrade_all_images(self):
      degraded_images = []
      for image_fp, label in self.examples:
          image = Image.open(image_fp)
          if self.transform is not None:
              image = self.transform(image)
          degraded_image = self.degrade_image(image)
          degraded_images.append(degraded_image)
      return degraded_images

    def create_gaussian_kernel(self, size, sigma):
        """Creates a Gaussian kernel only if not already cached."""
        if (size, sigma) not in self.kernel_cache:
            ax = torch.linspace(-(size - 1) / 2., (size - 1) / 2., size)
            xx, yy = torch.meshgrid(ax, ax)
            kernel = torch.exp(-(xx**2 + yy**2) / (2 * sigma**2))
            kernel = kernel / torch.sum(kernel)
            self.kernel_cache[(size, sigma)] = kernel.view(1, 1, size, size)
        return self.kernel_cache[(size, sigma)]

    def degrade_image(self, image):
        c, h, w = image.shape
        patch_size = random.randint(10, 32)
        x = random.randint(0, w - patch_size)
        y = random.randint(0, h - patch_size)

        # Operate directly on a slice of the original image
        patch = image[:, y:y + patch_size, x:x + patch_size]

        # Apply Gaussian blur to the patch
        if random.random() > 0.5:
            size = random.choice([3, 5, 7])  # Kernel size
            sigma = random.uniform(0.5, 1.5)  # Sigma for Gaussian kernel
            blur_kernel = self.create_gaussian_kernel(size, sigma).repeat(c, 1, 1, 1).to(image.device)
            patch = F.pad(patch, (size//2, size//2, size//2, size//2), mode='reflect')
            patch = F.conv2d(patch.unsqueeze(0), blur_kernel, padding=0, stride=1, groups=c).squeeze(0)

        # Add Gaussian noise
        if random.random() > 0.5:
            noise = torch.randn_like(patch) * 0.05
            patch.add_(noise)  # In-place addition of noise

        # No need to place back, patch is a reference to a slice of 'image'
        return image

def show_dataset_images(dataset, indices, ncols=3):
    plt.figure(figsize=(15, 5))  # Adjust the size as needed
    for i, idx in enumerate(indices):
        image, _ = dataset[idx]
        if isinstance(image, torch.Tensor):  # Check if the image needs to be converted from a tensor
            image = image.permute(1, 2, 0).numpy()  # Adjust dimensions for Matplotlib
        plt.subplot(1, ncols, i + 1)
        plt.imshow(image)
        plt.title(f"Index: {idx}")
        plt.axis('off')
    plt.show()

permutation = np.random.permutation(range(len(all_fps)))
dataset = PCamDataset([
        (os.path.join(pcam_directory, 'train', all_fps[index]), label_mapping[all_fps[index][:-4]])
        for index in permutation[:int(len(permutation) * .8)]
    ], transform= transforms.Compose([
    transforms.ColorJitter(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Imagenet statistics
]))
dataloader = DataLoader(dataset, batch_size=32, shuffle=True),
degraded_images = dataset.degrade_all_images()
show_dataset_images(degraded_images, range(12), ncols=3)


In [None]:
#code below based off of slideflow implementation of DeepFocus algorithm (https://github.com/jamesdolezal/slideflow/blob/master/slideflow/slide/qc/deepfocus.py)

import torch
import torch.nn as nn
import torch.nn.functional as F

class DeepFocusV3(nn.Module):
    def __init__(self, filters=(32, 32, 64, 128, 128), kernel_sizes=(5, 3, 3, 3, 3), fc=(128, 64)):
        super(DeepFocusV3, self).__init__()
        self.filters = filters
        self.kernel_sizes = kernel_sizes

        # Assuming the input images are 64x64 RGB images
        # Convolutional layers
        self.conv1 = nn.Conv2d(3, filters[0], kernel_size=kernel_sizes[0], padding='same')
        self.bn1 = nn.BatchNorm2d(filters[0])

        self.conv2 = nn.Conv2d(filters[0], filters[1], kernel_size=kernel_sizes[1], padding='same')
        self.bn2 = nn.BatchNorm2d(filters[1])

        self.conv3 = nn.Conv2d(filters[1], filters[2], kernel_size=kernel_sizes[2], padding='same')
        self.bn3 = nn.BatchNorm2d(filters[2])
        self.pool1 = nn.MaxPool2d(2, padding='same')

        self.conv4 = nn.Conv2d(filters[2], filters[3], kernel_size=kernel_sizes[3], padding='same')
        self.bn4 = nn.BatchNorm2d(filters[3])
        self.pool2 = nn.MaxPool2d(2, padding='same')

        self.conv5 = nn.Conv2d(filters[3], filters[4], kernel_size=kernel_sizes[4], padding='same')
        self.bn5 = nn.BatchNorm2d(filters[4])
        self.pool3 = nn.MaxPool2d(2, padding='same')

        # Fully connected layers
        self.fc1 = nn.Linear(filters[4] * 8 * 8, fc[0])  # Adjust the sizing calculation as necessary
        self.bn6 = nn.BatchNorm1d(fc[0])
        self.dropout1 = nn.Dropout(0.2)

        self.fc2 = nn.Linear(fc[0], fc[1])
        self.bn7 = nn.BatchNorm1d(fc[1])
        self.dropout2 = nn.Dropout(0.2)

        self.fc3 = nn.Linear(fc[1], 2)  # Output layer for binary classification

    def forward(self, x):
        # Subtract mean
        x = x - torch.mean(x, dim=(2, 3), keepdim=True)

        # Convolutional blocks
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool1(x)

        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool2(x)

        x = F.relu(self.bn5(self.conv5(x)))
        x = self.pool3(x)

        # Flatten the output for the fully connected layers
        x = torch.flatten(x, 1)

        # Fully connected layers
        x = F.relu(self.bn6(self.fc1(x)))
        x = self.dropout1(x)

        x = F.relu(self.bn7(self.fc2(x)))
        x = self.dropout2(x)

        x = F.softmax(self.fc3(x), dim=1)
        return x

# Instantiate the model and transfer it to the device
model = DeepFocusV3()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set the model to evaluation mode

# Function to predict clarity
exclude_indices = []
def predict_clarity(dataloader, model, threshold = 0.75):
    with torch.no_grad():  # No need to track gradients
        for i, (images, labels) in enumerate(dataloader):
            images = images.to(device)
            outputs = model(images)
            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            clear_probs = probabilities[:, 1]  # Index 1 for 'clear'

            # Decide which images to exclude based on the threshold
            for j, prob in enumerate(clear_probs):
                if prob.item() < threshold:  # Less than 75% probability of being 'clear'
                    exclude_indices.append(i * dataloader.batch_size + j)
    dataset.update_exclusion_list(set(exclude_indices))

# Predict clarity of PCAM images
predict_clarity(dataloader, model)
