# 📚 Import Libraries

In [1]:
!cp -r ../input/timm-pytorch-image-models . 
!cp -r ../input/openslide .

In [2]:
!pip install -qU ./timm-pytorch-image-models/pytorch-image-models-master
!pip install -qU ./openslide

[0m

In [3]:
!conda install ../input/how-to-use-pyvips-offline/*.tar.bz2 


Downloading and Extracting Packages
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
######################################################################## | 100% 
###########################################################

In [4]:
import pandas as pd
import numpy as np
from glob import glob
from collections import defaultdict
from tqdm import tqdm
import time
import os 
import copy
import gc
from openslide import OpenSlide
from PIL import Image
# visualization
import cv2
import matplotlib.pyplot as plt

# Sklearn
from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold,GroupKFold 

# PyTorch 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

import timm

import zipfile
import pyvips
# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Metrics 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score


# For colored terminal text
from colorama import Fore, Back, Style
c_  = Fore.GREEN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings('ignore')

# ⚙️ Configuration

In [5]:
fake_inf = len(glob("/kaggle/input/mayo-clinic-strip-ai/test/*")) == 4 
print(fake_inf)

True


In [6]:
class CFG:
    seed          = 2307
    debug         = False # set debug=False for Full Training
    comment       = "eff b7 more more 0.15 satruration on data"
    n_flods       = 5
    backbone      = "efficientnet_b3"
    train_bs      = 8
    valid_bs      = train_bs*4
    epochs        = 25
    lr            = 1e-4
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(30000/train_bs*epochs)+50
    T_0           = 25
    warmup_epochs = 1
    wd            = 1e-6
    n_accumulate  = 2#max(1, 32//train_bs)
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    tile_size     = (1024,1024)
    t_thr         = 0.4 # percentage of required color pixles to |keep the pic 
    faster_inf    = True #only take N_slides slides from each image 
    N_slides      = 16

In [7]:
def set_seed(seed = 42):
    np.random.seed(seed)
    #random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    print('> SEEDING DONE')
    
set_seed(CFG.seed)

> SEEDING DONE


# ❗ Data

In [8]:
def get_data_info(paths,df):
    img_prop = defaultdict(list)
    
    for i, path in enumerate(paths):
        img_path = paths[i]
        slide = OpenSlide(img_path)    
        img_prop['image_id'].append(img_path[-12:-4])
        img_prop['width'].append(slide.dimensions[0])
        img_prop['height'].append(slide.dimensions[1])
        img_prop['size'].append(round(os.path.getsize(img_path) / 1e6, 2))
        img_prop['path'].append(img_path)
    
    image_data = pd.DataFrame(img_prop)
    image_data['img_aspect_ratio'] = image_data['width']/image_data['height']
    image_data.sort_values(by='image_id', inplace=True)
    image_data.reset_index(inplace=True, drop=True)
    
    image_data = image_data.merge(df, on='image_id')
    return image_data

In [9]:
test_images = glob("/kaggle/input/mayo-clinic-strip-ai/test/*")
test_df = pd.read_csv('../input/mayo-clinic-strip-ai/test.csv')
test_df= get_data_info(test_images,test_df)
test_df.head()

Unnamed: 0,image_id,width,height,size,path,img_aspect_ratio,center_id,patient_id,image_num
0,006388_0,34007,60797,1312.94,/kaggle/input/mayo-clinic-strip-ai/test/006388...,0.559353,11,006388,0
1,008e5c_0,5946,29694,109.57,/kaggle/input/mayo-clinic-strip-ai/test/008e5c...,0.200242,11,008e5c,0
2,00c058_0,15255,61801,351.76,/kaggle/input/mayo-clinic-strip-ai/test/00c058...,0.246841,11,00c058,0
3,01adc5_0,55831,26553,679.17,/kaggle/input/mayo-clinic-strip-ai/test/01adc5...,2.102625,11,01adc5,0


In [10]:
def tile(img, sz=128, N=16):
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    return img


def save_dataset(
    df: pd.DataFrame, 
    N=16,
    max_size=20000, 
    crop_size=1024, 
    image_dir='../input/mayo-clinic-strip-ai/train', 
    out_dir='train_images.zip',
):
    format_to_dtype = {
       'uchar': np.uint8,
       'char': np.int8,
       'ushort': np.uint16,
       'short': np.int16,
       'uint': np.uint32,
       'int': np.int32,
       'float': np.float32,
       'double': np.float64,
       'complex': np.complex64,
       'dpcomplex': np.complex128,
    }
    def vips2numpy(vi):
        return np.ndarray(
            buffer=vi.write_to_memory(),
            dtype=format_to_dtype[vi.format],
            shape=[vi.height, vi.width, vi.bands])
   
    tk0 = tqdm(enumerate(df["image_id"].values), total=len(df))
    for i, image_id in tk0:
        print(f"[{i+1}/{len(df)}] image_id: {image_id}")
        image = pyvips.Image.thumbnail(f'{image_dir}/{image_id}.tif', max_size)
        image = vips2numpy(image)
        width, height, c = image.shape
        print(f"Input width: {width} height: {height}")
        images = tile(image, sz=crop_size, N=N)
        for idx, img in enumerate(images):
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            #img = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 100])[1]
            cv2.imwrite(f"./test/{image_id}_{idx}.png", img)
        del img, image, images; gc.collect()

df = test_df
!mkdir ./test
save_dataset(
    df,
    N=16, 
    max_size=20000,
    crop_size=1024, 
    image_dir='/kaggle/input/mayo-clinic-strip-ai/test', 
    out_dir=f'train_images_.zip'
)

  0%|          | 0/4 [00:00<?, ?it/s]

[1/4] image_id: 006388_0
Input width: 20000 height: 11187


 25%|██▌       | 1/4 [00:45<02:17, 45.94s/it]

[2/4] image_id: 008e5c_0
Input width: 20000 height: 4005


 50%|█████     | 2/4 [00:51<00:44, 22.48s/it]

[3/4] image_id: 00c058_0
Input width: 20000 height: 4937


 75%|███████▌  | 3/4 [01:08<00:19, 19.59s/it]

[4/4] image_id: 01adc5_0
Input width: 9512 height: 20000


100%|██████████| 4/4 [01:41<00:00, 25.48s/it]


In [11]:
def get_data_info(paths , train = False):
    img_prop = defaultdict(list)
    
    for i, path in tqdm(enumerate(paths), total = len(paths),desc = "making dataframe"):
        img_info =  path.split('/')[-1]
        patient_id , image_num , _ = img_info.split("_")
        #tl_pixel = tl_pixel.split('.')[0]
        
        
        img_prop['image_id'].append(f"{patient_id}_{image_num}")
        img_prop['patient_id'].append(patient_id)
        img_prop['image_num'].append(image_num)
        img_prop['path'].append(path)
        #img_prop['tl_pixel'].append(tl_pixel)
        
        if train:
            label = train_data[train_data["image_id"]==f"{patient_id}_{image_num}"].label.item()
            
            img_prop['label'].append(label)
            
        
        #img_prop['density'].append(extra_info)
    
    image_data = pd.DataFrame(img_prop)

    image_data.sort_values(by='image_id', inplace=True)
    image_data.reset_index(inplace=True, drop=True)
    #image_data['density'] = image_data['density'].astype(np.float16)
    
    return image_data

In [12]:
test_images = glob("./test/*")
df = get_data_info(test_images)
df.head()

making dataframe: 100%|██████████| 64/64 [00:00<00:00, 204444.37it/s]


Unnamed: 0,image_id,patient_id,image_num,path
0,006388_0,6388,0,./test/006388_0_14.png
1,006388_0,6388,0,./test/006388_0_1.png
2,006388_0,6388,0,./test/006388_0_10.png
3,006388_0,6388,0,./test/006388_0_7.png
4,006388_0,6388,0,./test/006388_0_15.png


# 🔨 Utility

In [13]:
def load_img(path):
    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    img = img.astype('float32') # original is uint16
    mx = np.max(img)
    if mx:
        img/=mx # scale image to [0, 1]
    return img

def show_img(img, ground_truth, pred = "", conf = ""):
    plt.imshow(img)
    plt.title(f'true: {"CE" if ground_truth else "LAA"} | predicted: {pred} | conf: {conf}')
    plt.axis('off')
    

# ❗ DataLoaders

In [14]:
class StripAiDataset(Dataset):
    def __init__(self, df, label=False ,transforms = None):
        self.df = df
        self.transforms = transforms
        self.file_names = df['path'].tolist()
        self.patient_id = df['patient_id'].tolist()
        self.label = None
        if label:
            self.labels = df.label.apply(lambda x: 1 if x == "CE" else 0).tolist()
        
    def __len__(self):
        return len(self.df)
  
    def __getitem__(self,index):
        img = load_img(self.file_names[index])

        if self.transforms:
            result = self.transforms(image=img)
            img = result["image"]
            
        img = np.transpose(img, (2, 0, 1))
        
        if not self.label:
            return  torch.tensor(img),self.patient_id[index]
        
        label = self.labels[index]
        return torch.tensor(img), torch.tensor(label)

In [15]:
data_transforms = {
    "train": A.Compose([], p=1.0),
    "valid": A.Compose([], p=1.0)
}

In [16]:
def prepare_loaders(test_df, debug=False):
 

    test_dataset = StripAiDataset(test_df, transforms=None)#data_transforms['valid'])

    test_loader = DataLoader(test_dataset, batch_size=CFG.valid_bs, 
                              num_workers=2, shuffle=False, pin_memory=False)
    
    return test_loader

In [17]:
class StripModel(nn.Module):

    def __init__(self, model_name, num_classes=2, pretrained=True):
        super().__init__()
        self.base = timm.create_model(model_name, pretrained=pretrained, num_classes=2)

    def forward(self, x):
        out  = self.base(x) 
        
        return out

In [18]:
def get_model(path):
    model = StripModel(CFG.backbone ,pretrained = False)
    model.to(CFG.device)
    model.load_state_dict(torch.load(path))#,map_location=torch.device(CFG.device)))
    model.eval()
    return model

# 🔧 Loss Function

# 🚄 Training Function

In [19]:
@torch.no_grad()
def infer(model_ptah, test_loader):
    preds = defaultdict(list)
    model     = get_model(model_ptah)
    soft = nn.Softmax()
    for idx, (img,pid) in enumerate(tqdm(test_loader, total=len(test_loader), desc='Infer ')):
        img = img.to(CFG.device, dtype=torch.float)# .squeeze(0)
        out = model(img).squeeze()
        out = soft(out).cpu().detach().numpy()
        preds["patient_id"].extend(pid)
        
        preds["CE"].extend(out[:,1])
        preds["LAA"].extend(out[:,0])
    
        
        del img,  out
        gc.collect()
        torch.cuda.empty_cache()

    return pd.DataFrame(preds)

In [20]:
test_loader  = prepare_loaders(df)
preds = infer("../input/first-attempt/best_epoch-00-0.778382251522347.bin",test_loader)

Infer : 100%|██████████| 2/2 [00:11<00:00,  5.64s/it]


In [21]:
preds = preds.groupby(by="patient_id").mean().reset_index()

In [22]:
preds

Unnamed: 0,patient_id,CE,LAA
0,006388,0.783796,0.216204
1,008e5c,0.806523,0.193477
2,00c058,0.489846,0.510154
3,01adc5,0.455385,0.544615


In [23]:
preds.to_csv("submission.csv", index = False)