In [1]:
# ====================================================
# Library
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import IPython.display

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
from PIL import Image
from glob import glob
import scipy as sp
import numpy as np
import pandas as pd
#!conda install *.tar.bz2 
#import Pyvips

from sklearn import preprocessing
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold, train_test_split



from tqdm.auto import tqdm
from functools import partial
import tifffile as tiff

import cv2
from openslide import OpenSlide
import seaborn as sns
from matplotlib import pyplot as plt
from pprint import pprint

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
import torchvision.transforms as transforms
import torch.optim as optim
import gc

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
#from pytorch_grad_cam.utils.image import show_cam_on_image
#from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM

#import timm

from torch.cuda.amp import autocast, GradScaler
Image.MAX_IMAGE_PIXELS = None
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
train_data = pd.read_csv('../input/mayo-clinic-strip-ai/train.csv')
train_data['bilabel'] = np.where(train_data['label']== 'CE', 1, 0)
train, vaild = train_test_split(train_data, test_size=0.2)

test = pd.read_csv('../input/mayo-clinic-strip-ai/test.csv')
sample_sub = pd.read_csv('../input/mayo-clinic-strip-ai/sample_submission.csv')
patients_train = train['patient_id'].nunique()
patients_test = test['patient_id'].nunique()
#print(test.head())
#print(f"Number of unique patients is train set: {patients_train}")
#print(f"Number of unique patients is test set: {patients_test}")
#sns.displot(train['label'], stat='percent')
#plt.show()


In [3]:
"""# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = roc_auc_score(y_true, y_pred)
    return score


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)"""



In [4]:
# ====================================================
# Dataset
# ====================================================
class TrainDataset(Dataset):
    def __init__(self, path, df, transform=None):
        self.df = df
        self.path = path
        self.Image_names = df['image_id'].values
        self.labels = df['bilabel'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Skip 2c3c06_0.tif as it is too large to read.
        #if image_id == "2c3c06_0":
         #   continue
        file_name = self.Image_names[idx]
        print(file_name)
        image = tiff.imread(os.path.join(self.path, f"{file_name}.tif"))
        cv2.imwrite(os.path.join("./", f"{file_name}.jpg"), image[::scale,::scale,::-1])
        del image
        gc.collect()
        img = Image.open(os.path.join('./', f"{file_name}.jpg"))
        
        if self.transform:
            image=self.transform(img)
        # add another dimension at the front to get NCHW shape
        #image = img.unsqueeze(0)
        label = self.labels[idx]

        return image, torch.tensor(label)

In [5]:
scale = 4
data_transform = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
train_dataset = TrainDataset("../input/mayo-clinic-strip-ai/train", train, transform = data_transform)
vaild_dataset = TrainDataset("../input/mayo-clinic-strip-ai/train", vaild, transform = data_transform)
#test_dataset = TrainDataset("../input/mayo-clinic-strip-ai/train", test, transform = data_transform)

dataset_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=4, shuffle=True,
                                             num_workers=0)
dataset_loader_vaild = torch.utils.data.DataLoader(vaild_dataset,
                                             batch_size=4, shuffle=True,
                                             num_workers=0)


In [6]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc


In [7]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels= 96, kernel_size= 11, stride=4, padding=0 )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride= 1, padding= 2)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride= 1, padding= 1)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.fc1  = nn.Linear(in_features= 9216, out_features= 4096)
        self.fc2  = nn.Linear(in_features= 4096, out_features= 4096)
        self.fc3 = nn.Linear(in_features=4096 , out_features=1)


    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.maxpool(x)
        x = F.relu(self.conv2(x))
        x = self.maxpool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.maxpool(x)
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.sigmoid(x)
        return x

In [8]:
model = AlexNet()
model = model.to(device=device)
print(device)
## Loss and optimizer
learning_rate = 1e-4 #I picked this because it seems to be the most used by experts
load_model = True
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr= learning_rate) #Adam seems to be the most popular for deep learning
for epoch in range(5): #I decided to train the model for 5 epochs
    loss_ep = 0
    epoch_acc = 0
    
    for batch_idx, (data, targets) in enumerate(dataset_loader):
        data = data.to(device=device)
        targets = targets.type(torch.FloatTensor).to(device=device)
        
        ## Forward Pass
        optimizer.zero_grad()
        scores = model(data)
        loss = criterion(scores.squeeze(),targets)
        acc = binary_acc(scores.squeeze(),targets)
        loss.backward()
        optimizer.step()
        loss_ep += loss.item()
        epoch_acc += acc.item()
        print(batch_idx)
    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(dataset_loader):.5f} | Acc: {epoch_acc/len(dataset_loader):.3f}')

        
        
    with torch.no_grad():
        num_correct = 0
        num_samples = 0
        for batch_idx, (data,targets) in enumerate(dataset_loader_vaild):
            data = data.to(device=device)
            targets = targets.to(device=device)
            ## Forward Pass
            scores = model(data)
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)
        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
        )


cuda
510d2e_0


KeyboardInterrupt: 