In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
%cd /home/adityasidharta/git/kaggle_pneumonia 
%env PROJECT_PATH = /home/adityasidharta/git/kaggle_pneumonia 

/home/adityasidharta/git/kaggle_pneumonia
env: PROJECT_PATH=/home/adityasidharta/git/kaggle_pneumonia


In [6]:
%matplotlib inline
import seaborn as sns

In [7]:
import pandas as pd
import numpy as np
import pydicom
from PIL import Image
import multiprocessing as mp
from tqdm import tqdm_notebook as tqdm

In [8]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [9]:
import torch.nn as nn
import torch.nn.functional as F

In [13]:
import torch.optim as optim
import torchvision
from sklearn.model_selection import train_test_split

from utils.envs import *
from utils.data_load import *
from utils.lr_finder import lr_plot, lr_find
from utils.common import get_batch_info
from utils.checkpoint import save_checkpoint, load_cp_model, load_cp_optim
from utils.logger import logger
from dev.dataset import Label_Dataset_Test, Label_Dataset
from model.arch.header import Header, Res50ClassHead
from model.arch.respneunet import ResPneuNet
from model.dataset import Single_Pneumonia_Dataset, Single_Pneumonia_Dataset_Test
from model.criterion import LabelBoundBoxCriterion
from model.train import train_step, fit_model
from model.validation import validate_model
from model.optim import CLR
from utils.pytorch import accuracy

In [14]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [15]:
train_label_df = pd.read_csv(train_label_repo)
train_label_df['area'] = train_label_df.width * train_label_df.height
single_label_df = train_label_df.sort_values('area', ascending = False).drop_duplicates('patientId').sort_index().reset_index(drop = True).copy()
single_label_df.fillna(0, inplace = True)

In [16]:
train_idx = np.arange(len(single_label_df))
dev_idx, val_idx = train_test_split(train_idx, test_size = 0.20)

In [17]:
dev_df = single_label_df.iloc[dev_idx,:].reset_index(drop = True)
val_df = single_label_df.iloc[val_idx,:].reset_index(drop = True)

In [18]:
dev_dataset = Label_Dataset(dev_df, train_dcm_path, device)
val_dataset = Label_Dataset(val_df, train_dcm_path, device)
test_dataset = Label_Dataset_Test(test_dcm_path, device)

In [19]:
dev_dataloader = DataLoader(dev_dataset, batch_size = 32, shuffle = True)
val_dataloader = DataLoader(val_dataset, batch_size = 32)
test_dataloader = DataLoader(test_dataset, batch_size = 32)

In [20]:
preload_model = torchvision.models.resnet50(pretrained=True).to(device)
header_model = Res50ClassHead([1000, 1000, 128], 0.5).to(device)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/adityasidharta/.torch/models/resnet50-19c8e357.pth
100%|██████████| 102502400/102502400 [00:01<00:00, 85643715.26it/s]


In [21]:
model = ResPneuNet(preload_model, header_model)

In [22]:
n_epoch = 4
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
criterion = nn.BCEWithLogitsLoss().to(device)

In [23]:
n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader)
clr = CLR(optimizer, n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2)
callbacks = [clr]

In [24]:
def calc_loss(model, criterion, data):
    img, target = data
    prediction = model(img)
    loss = criterion(prediction, target)
    return loss

In [25]:
def calc_metric(model, data):
    img, target = data
    prediction = model(img)
    metric = accuracy(prediction, target)
    return metric

In [None]:
model = fit_model(model, 4, dev_dataloader, optimizer, criterion, calc_loss, calc_metric, val_dataloader)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

train_loss : 0.8276632970451011


HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

val_loss : 0.8106503470953598


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

train_loss : 0.8320373250388803


HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

val_loss : 0.8141669711362804


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

train_loss : 0.8720353810264385


HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

val_loss : 0.8453713006941906


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

In [None]:
predict_model(model, test_dataloader, pred_fn)