In [None]:
from ast import While
from fastai.vision.all import *
from fastai.callback.tensorboard import TensorBoardCallback
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import os
import cv2
import gc
import random
from albumentations import *
import albumentations as A
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import timm
import warnings
warnings.filterwarnings("ignore")


In [None]:
cfg = SimpleNamespace(**{})
cfg.img_size = 1024
cfg.backbone = 'seresnext50_32x4d'
cfg.pretrained=True
cfg.in_channels = 1
cfg.classes = ['cancer']
cfg.bs = 4
cfg.data_folder = "/kaggle/input/rsna-1024x1024-pngs-small/pngs/"
cfg.val_aug = A.CenterCrop(always_apply=False, p=1.0, height=cfg.img_size, width=cfg.img_size)
cfg.device = 0
cfg.nfolds = 5
cfg.seed = 42
cfg.p = 0.5
cfg.device_ids = [0]
cfg.nw = 4
cfg.save_name = "seresnext50_32x4d"
cfg.lr = 1e-4
cfg.wd = 0.01
cfg.epochs = 5

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    #the following line gives ~10% speedup
    #but may lead to some stochasticity in the results 
    torch.backends.cudnn.benchmark = True
seed_everything(cfg.seed)
train_df = pd.read_csv('/kaggle/input/rsna-breast-cancer-detection/train.csv')

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, cfg, aug):

        self.cfg = cfg
        self.df = df.copy()
        self.df = self.df[self.df['image_id'].astype(str) != '1942326353']
        self.labels = self.df[self.cfg.classes].values
        self.df["fns"] = self.df['patient_id'].astype(str) + '_' + self.df['image_id'].astype(str) + '.png'
        self.fns = self.df["fns"].astype(str).values
        self.aug = aug 
        self.data_folder = cfg.data_folder
    def __getitem__(self, idx):
        label = self.labels[idx]
        img = self.load_one(idx)
        if self.aug:
            img = self.augment(img)
        img = self.normalize_img(img)
        x = torch.tensor(img).float().permute(2,0,1)
        y = torch.tensor(label).float()
        return x,y
    def __len__(self):
        return len(self.fns)
    def load_one(self, idx):
        path = self.data_folder + self.fns[idx]
        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        shape = img.shape
        if len(img.shape) == 2:
            img = img[:,:,None]
        return img
    def augment(self, img):
        img = img.astype(np.float32)
        transformed = self.aug(image=img)
        trans_img = transformed["image"]
        return trans_img
    def normalize_img(self, img):
        img = img / 255
        return img

In [None]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1.0 / p)


class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=False):
        super(GeM, self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1) * p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        ret = gem(x, p=self.p, eps=self.eps)
        return ret

    def __repr__(self):
        return (self.__class__.__name__  + f"(p={self.p.data.tolist()[0]:.4f},eps={self.eps})")



class Net(nn.Module):

    def __init__(self, cfg: Any):
        super(Net, self).__init__()

        self.cfg = cfg
        self.n_classes = len(cfg.classes)
        self.backbone = timm.create_model(cfg.backbone, 
                                          pretrained=cfg.pretrained, 
                                          num_classes=0, 
                                          global_pool="", 
                                          in_chans=self.cfg.in_channels)
    
        backbone_out = self.backbone.feature_info[-1]['num_chs']

        self.global_pool = GeM(p_trainable=False)
        self.head = torch.nn.Linear(backbone_out, self.n_classes)
    def forward(self, x):

        x = self.backbone(x)
        x = self.global_pool(x)
        x = x[:,:,0,0]

        logits = self.head(x)
        
        return logits


In [None]:
train_aug = Compose([
    A.CenterCrop(always_apply=False, p=1.0, height=cfg.img_size, width=cfg.img_size),
    RandomSizedCrop(min_max_height=(int(cfg.img_size*0.8),cfg.img_size), height=cfg.img_size, width=cfg.img_size, p=cfg.p),
], p=1)

val_aug = Compose([
    A.CenterCrop(always_apply=False, p=1.0, height=cfg.img_size, width=cfg.img_size),
    RandomSizedCrop(min_max_height=(int(cfg.img_size*0.8),cfg.img_size), height=cfg.img_size, width=cfg.img_size, p=cfg.p),
], p=1)

In [None]:
from sklearn.model_selection import StratifiedGroupKFold
kfold = StratifiedGroupKFold(n_splits = cfg.nfolds, shuffle = True, random_state = cfg.seed)
for num, (train_index, val_index) in enumerate(kfold.split(train_df, train_df['cancer'], train_df['patient_id'])):
    train_df.loc[val_index, 'fold'] = int(num)
train_df['fold'] = train_df['fold'].astype(int)

In [None]:
os.makedirs(f"/kaggle/working/{cfg.save_name}/log", exist_ok=True)
def loss_fn(preds, targets):
    return nn.BCEWithLogitsLoss()(preds, targets)

In [None]:
for fold in range(cfg.nfolds):
    print(' ')
    print(f"========== fold: {fold} training ==========")
    x_train = train_df[train_df['fold'] != fold]
    x_val = train_df[train_df['fold'] == fold]
    ds_t = CustomDataset(x_train,cfg,train_aug)
    ds_v = CustomDataset(x_val,cfg,val_aug)
    data = DataLoaders.from_dsets(ds_t,ds_v,bs=cfg.bs,
                num_workers=cfg.nw,pin_memory=True).to(cfg.device_ids[0])
    model = Net(cfg)
    if len(cfg.device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=cfg.device_ids)
    model.to(cfg.device_ids[0])
    comp=np.less
    monitor = "valid_loss"
    learn = Learner(data, model,wd=cfg.wd ,lr = cfg.lr,loss_func=loss_fn,model_dir="",
                path=f"/kaggle/working/{cfg.save_name}",cbs=[SaveModelCallback(monitor=monitor,comp=comp,fname=cfg.save_name+f"_{fold}"),CSVLogger(fname=f"log/{cfg.save_name}"+f"_{fold}.csv")]).to_fp16()
    print(f"Fold {fold}: {cfg.save_name}")
    learn.fit_one_cycle(cfg.epochs )
    break

In [1]:
import timm
model= timm.create_model("seresnext50_32x4d", 
                                          pretrained=True,)

In [3]:
model.default_cfg

{'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext50_32x4d_racm-a304a460.pth',
 'num_classes': 1000,
 'input_size': (3, 224, 224),
 'pool_size': (7, 7),
 'crop_pct': 0.875,
 'interpolation': 'bicubic',
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'first_conv': 'conv1',
 'classifier': 'fc',
 'architecture': 'seresnext50_32x4d'}

In [13]:
import torch
x = torch.randn(1, 3, 1024, 224)
model._modules['fc'] = torch.nn.Identity()

In [14]:
model(x).shape

torch.Size([1, 2048])

In [1]:
2+42*37-1

1555