In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%cd /home/aditya/git/kaggle_pneumonia 
%env PROJECT_PATH = /home/aditya/git/kaggle_pneumonia 

/home/aditya/git/kaggle_pneumonia
env: PROJECT_PATH=/home/aditya/git/kaggle_pneumonia


In [3]:
%matplotlib inline
import seaborn as sns

In [4]:
import pandas as pd
import numpy as np
import pydicom
from PIL import Image
import multiprocessing as mp
from tqdm import tqdm_notebook as tqdm

In [5]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [6]:
import torch.nn as nn
import torch.nn.functional as F

In [7]:
import torch.optim as optim
import torchvision
from sklearn.model_selection import train_test_split

from utils.envs import *
from utils.data_load import *
from utils.lr_finder import lr_plot, lr_find
from utils.common import get_batch_info
from utils.checkpoint import save_checkpoint, load_cp_model, load_cp_optim
from utils.logger import logger
from model.dataset import LabelDataset, BBDataset
from model.arch.header import Header, Res50BBHead
from model.arch.respneunet import ResPneuNet
from model.criterion import LabelBoundBoxCriterion
from model.train import train_step, fit_model
from model.validation import validate_model
from model.optim import CLR
from model.test import predict_model
from utils.pytorch import accuracy
from utils.kaggle import create_kaggle_df, create_predict_df

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [9]:
bb_df = pd.read_csv(bb_repo)

In [10]:
train_idx = np.arange(len(bb_df))
dev_idx, val_idx = train_test_split(train_idx, test_size = 0.20)
dev_df = bb_df.iloc[dev_idx,:].reset_index(drop = True)
val_df = bb_df.iloc[val_idx,:].reset_index(drop = True)

In [11]:
bb_train_dataset = BBDataset(True, device, dev_df)
bb_dev_dataset = BBDataset(True, device, dev_df)
bb_val_dataset = BBDataset(True, device, val_df)
bb_test_dataset = BBDataset(False, device)

In [12]:
train_dataloader = DataLoader(bb_train_dataset, batch_size = 32)
dev_dataloader = DataLoader(bb_dev_dataset, batch_size = 32, shuffle = True)
val_dataloader = DataLoader(bb_val_dataset, batch_size = 32)
test_dataloader = DataLoader(bb_test_dataset, batch_size = 32)

In [13]:
preload_model = torchvision.models.resnet50(pretrained=True).to(device)
header_model = Res50BBHead([1000], 0.5).to(device)
model = ResPneuNet(preload_model, header_model)

In [14]:
n_epoch = 5
optimizer = optim.Adam([{'params': model.preload_backbone.parameters(), 'lr': 0.0001 }, 
                        {'params': model.header.parameters(), 'lr': 0.001} ], 
                         betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
criterion = nn.MSELoss().to(device)

In [15]:
n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader)
clr = CLR(optimizer, n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2)
callbacks = [clr]

In [16]:
def loss_fn(model, criterion, data):
    img, target = data
    prediction = model(img)
    loss = criterion(prediction, target)
    return loss

In [17]:
def metric_fn(model, data):
    img, target = data
    prediction = model(img)
    metric = F.mse_loss(prediction, target)
    return metric

In [18]:
def pred_fn(model, data):
    img = data
    prediction = model(img)
    prediction_array =  prediction.data.cpu().numpy() * 1024.
    return prediction_array.tolist()

In [19]:
model = fit_model(model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, val_dataloader, checkpoint = True, model_fn = 'bb')

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, max=142), HTML(value='')))

HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

 val_loss : 0.023541188850584958, val_metric : 0.023541188850584958


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))

HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

 val_loss : 0.022597675289337833, val_metric : 0.022597675289337833


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))

HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

 val_loss : 0.024097457424634032, val_metric : 0.024097457424634032


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))

HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

 val_loss : 0.022185292198426194, val_metric : 0.022185292198426194


HBox(children=(IntProgress(value=0, max=142), HTML(value='')))

HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

 val_loss : 0.021407109814592533, val_metric : 0.021407109814592533



In [20]:
prediction = predict_model(model, test_dataloader, pred_fn)

HBox(children=(IntProgress(value=0, max=32), HTML(value='')))




In [21]:
string_prediction = ["{} {} {} {}".format(x[0], x[1], x[2], x[3]) for x in prediction]

In [22]:
patientid = test_dataloader.dataset.patientId
pneu_bb = string_prediction

In [23]:
bb_pred_df = pd.DataFrame({
    'name' : patientid,
    'label' : pneu_bb
})

In [24]:
bb_pred_df.to_csv(bb_predict_repo, index = False)

In [25]:
save_checkpoint(model, optimizer, fname = 'bb')

In [26]:
label_pred_df = pd.read_csv(label_predict_repo)

In [27]:
predict_df = create_predict_df(label_pred_df, bb_pred_df)

In [28]:
kaggle_df = create_kaggle_df(predict_df)

In [31]:
kaggle_df.to_csv(kaggle_repo, index = False)