In [2]:
from fastai2 import *
from fastai2.vision.all import *
from fastai2.callback.fp16 import *

import pretrainedmodels
import skimage.io

WINDOW_SIZE = 200
STRIDE = 64
K = 36

In [3]:
source = Path("../input/prostate-cancer-grade-assessment")

submission_test_path = "../input/prostate-cancer-grade-assessment/train_images/"
sample = '../input/prostate-cancer-grade-assessment/sample_submission.csv'

sub_df = pd.read_csv(sample)
# test_df = pd.read_csv(source/f'test.csv')
test_df = pd.read_csv(source/f'train.csv')
test_df = test_df[:20]

In [4]:
def compute_statistics(image):
    """
    Args:
        image                  numpy.array   multi-dimensional array of the form WxHxC
    
    Returns:
        ratio_white_pixels     float         ratio of white pixels over total pixels in the image 
    """
    width, height = image.shape[0], image.shape[1]
    num_pixels = width * height
    
    num_white_pixels = 0
    
    summed_matrix = np.sum(image, axis=-1)
    # Note: A 3-channel white pixel has RGB (255, 255, 255)
    num_white_pixels = np.count_nonzero(summed_matrix > 620)
    ratio_white_pixels = num_white_pixels / num_pixels
    
    green_concentration = np.mean(image[1])
    blue_concentration = np.mean(image[2])
    
    return ratio_white_pixels, green_concentration, blue_concentration

In [5]:
def select_k_best_regions(regions, k=16):
    """
    Args:
        regions -- list           list of 2-component tuples first component the region, 
                                             second component the ratio of white pixels
                                             
        k -- int -- number of regions to select
    """
    regions = [x for x in regions if x[3] > 180 and x[4] > 180]
    k_best_regions = sorted(regions, key=lambda tup: tup[2])[:k]
    return k_best_regions

In [6]:
def get_k_best_regions(coordinates, image, window_size=512):
    regions = {}
    for i, tup in enumerate(coordinates):
        x, y = tup[0], tup[1]
        regions[i] = image[x : x+window_size, y : y+window_size, :]
    
    return regions

In [7]:
def generate_patches(slide_path, window_size=200, stride=224, k=16):
    
    try:
        image = skimage.io.MultiImage(slide_path)[1]
    except Exception as e:
        print(str(e))
        return None, None, None
    
    image = np.array(image)
    
    max_width, max_height = image.shape[0], image.shape[1]
    regions_container = []
    i = 0
    
    while window_size + stride*i <= max_height:
        j = 0
        
        while window_size + stride*j <= max_width:            
            x_top_left_pixel = j * stride
            y_top_left_pixel = i * stride
            
            patch = image[
                x_top_left_pixel : x_top_left_pixel + window_size,
                y_top_left_pixel : y_top_left_pixel + window_size,
                :
            ]
            
            ratio_white_pixels, green_concentration, blue_concentration = compute_statistics(patch)
            
            region_tuple = (x_top_left_pixel, y_top_left_pixel, ratio_white_pixels, green_concentration, blue_concentration)
            regions_container.append(region_tuple)
            
            j += 1
        
        i += 1
    
    k_best_region_coordinates = select_k_best_regions(regions_container, k=k)
    k_best_regions = get_k_best_regions(k_best_region_coordinates, image, window_size)
    
    return image, k_best_region_coordinates, k_best_regions

In [8]:
## Glue to one picture
def glue_images_one(tiles, image_size=200, n_tiles=36):

        idxes = list(range(n_tiles))

        n_row_tiles = int(np.sqrt(n_tiles))
        image = np.zeros((image_size * n_row_tiles, image_size * n_row_tiles, 3))
        
        for h in range(n_row_tiles):
            for w in range(n_row_tiles):
                i = h * n_row_tiles + w
    
                if len(tiles) > idxes[i]:
                    this_img = tiles[idxes[i]]
                else:
                    this_img = np.ones((image_size, image_size, 3)).astype(np.uint8) * 255
                    
                this_img = 255 - this_img
                
                h1 = h * image_size
                w1 = w * image_size
                image[h1:h1+image_size, w1:w1+image_size] = this_img

        image = 255 - image
        image = image.astype(np.uint8)
        image = image.transpose(0, 1, 2)

        return tensor(image)

In [9]:
def get_inf(df=test_df):

    filename = f'{submission_test_path}/{df.image_id}.tiff' 
    _, _, best_regions = generate_patches(filename, window_size=WINDOW_SIZE, stride=STRIDE, k=K)
    
    glued_image = glue_images_one(tiles=best_regions, image_size=WINDOW_SIZE, n_tiles=K)

    return tensor(glued_image)


In [10]:
blocks = ( ImageBlock,CategoryBlock)
getters = [ get_inf, ColReader('isup_grade')]

dBlock = DataBlock( blocks=blocks,
                    getters=getters,
                    item_tfms=Resize(1200),
                    batch_tfms=[*aug_transforms(size=320, max_warp=0.0, max_rotate=0.0),
                               Normalize.from_stats(*imagenet_stats)]) 

dls = dBlock.dataloaders(test_df, bs=16)



In [11]:
m = pretrainedmodels.se_resnext101_32x4d(pretrained='imagenet')

children = list(m.children())
head = nn.Sequential(nn.AdaptiveAvgPool2d(1), 
                    Flatten(), 
                    nn.Linear(children[-1].in_features, 6))

model = nn.Sequential(nn.Sequential(*children[:-2]), head) 

In [12]:
learn = Learner(dls, model)

In [20]:
learn.load('1_320x320_july9_0.9898_model')

<fastai2.learner.Learner at 0x7fcf56d6c860>

In [28]:
if os.path.exists(submission_test_path):
        
    print('doing predictions')
    test_dl = dls.test_dl(test_df)
    _,_, preds = learn.get_preds(dl=test_dl, with_decoded=True)
    print(preds)
    print()

    print('writing  submission file')
    test_df["isup_grade"] = preds
    sub = test_df[["image_id","isup_grade"]]
    sub.to_csv('submission.csv', index=False)
    print()


doing predictions


tensor([0, 0, 4, 4, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 3, 0, 3, 0, 3])

writing  submission file



In [31]:
# learn.show_results(max_n=6)

In [30]:
test_df["isup_grade"] = preds
sub = test_df[["image_id","isup_grade"]]
sub.to_csv('submission.csv', index=False)