In [None]:
import os
import glob
import numpy as np
import pandas as pd
import seaborn as sns

from openslide import OpenSlide
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix

import fastai
from fastai.vision.all import *

In [None]:
def load_image(fn, mode=None):
    "Open and load a `PIL.Image` and convert to `mode`"
    slide_path, h, w, level, ps = fn.split('&')
    h = int(h); w = int(w)
    level = int(level); ps = int(ps)
    
    with OpenSlide(slide_path) as slide:
        im = slide.read_region((w, h), level, (ps, ps)).convert('RGB')
    
    im.load()
    im = im._new(im.im)
    return im.convert(mode) if mode else im

fastai.vision.core.load_image = load_image

# Utils

In [None]:
# def tissue_mask(slide, level):
#     col, row = slide.level_dimensions[level]

#     img = np.array(slide.read_region((0, 0), level, (col, row)))
#     img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
#     img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
#     img = img[:, :, 1]
#     _, tissue_mask = cv2.threshold(
#         img,
#         0,
#         255,
#         cv2.THRESH_BINARY + cv2.THRESH_OTSU
#     )
    
#     return tissue_mask

# Data

In [None]:
def get_df(csv_list, num=100):
    df = []

    for item in csv_list:
        data = pd.read_csv(item)
        pos = shuffle(data[data['label'] == 1])
        neg = shuffle(data[data['label'] == 0])
        df_sample = pd.concat([pos[:num], neg[:num]])

        df += [df_sample]

    df = pd.concat(df)
    
    return df

In [None]:
### csv sample
# test.csv
# 
#   img                       label
# 0 /path/test.svs&3328&7680&0&256  1
# 1 /path/test.svs&3584&7424&0&256  1
# 2 /path/test.svs&3584&7680&0&256  0
# 3 /path/test.svs&3840&7424&0&256  0
# ...
#
# /path/test.svs&3840&7424&0&256: slide_path&h_start&w_start&read_level&read_patch_size

In [None]:
train_csv = glob.glob('/your/all/train/csv/*.csv')
train_df = get_df(train_csv)
train_df['is_valid'] = 0

eval_csv = glob.glob('/your/all/eval/csv/*.csv')
eval_df = get_df(eval_csv)
eval_df['is_valid'] = 1

df = pd.concat([train_df, eval_df])

In [None]:
path = '/'

dls = ImageDataLoaders.from_df(
    df, 
    path,
    valid_col='is_valid',
    item_tfms=Resize(256),
    batch_tfms=aug_transforms(size=256)
)

In [None]:
dls.show_batch()

# Training

In [None]:
learn = vision_learner(
    dls, 
    resnet34,
    metrics=[accuracy],
)

In [None]:
learn.model_dir = '/your/model/path'

In [None]:
learn.fine_tune(
    12,
    base_lr = 3e-4,
    cbs=[SaveModelCallback(fname='best')],
)

# Evulation

In [None]:
learn.load('best')

In [None]:
p, t = learn.get_preds()
p = p.numpy()
t = t.numpy()
p = np.argmax(p, axis=1)

In [None]:
confusion_matrix(t, p)

# Testing

In [None]:
test_df = pd.read_csv('/your/test/csv.csv')
test_dl = learn.dls.test_dl(test_df)
p, _ = learn.get_preds(dl=test_dl)
p = p.numpy()

In [None]:
score = p[:, 1]
test_df['pred'] = score
test_df.to_csv('/your/save/path/pred.csv', index=None)

# Ensemble

In [None]:
def wsi_classification(PM_A, PM_B, threshold=0.5):
    
    neg_score = np.sum((PM_A > threshold) & (PM_B <= threshold))
    pos_score = np.sum((PM_A > threshold) & (PM_B > threshold))
    
    return 1 if pos_score > neg_score else 0

In [None]:
PM_A = np.random.rand(1024, 1024)
PM_B = np.random.rand(1024, 1024)

In [None]:
wsi_classification(PM_A, PM_B)