In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%cd /home/aditya/git/kaggle_pneumonia 
%env PROJECT_PATH = /home/aditya/git/kaggle_pneumonia 

/home/aditya/git/kaggle_pneumonia
env: PROJECT_PATH=/home/aditya/git/kaggle_pneumonia


In [3]:
%matplotlib inline
import seaborn as sns

In [4]:
import pandas as pd
import numpy as np
import pydicom
from PIL import Image
import multiprocessing as mp
from tqdm import tqdm_notebook as tqdm

In [5]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [6]:
import torch.nn as nn
import torch.nn.functional as F

In [7]:
import torch.optim as optim
import torchvision
from sklearn.model_selection import train_test_split

from utils.envs import *
from utils.data_load import *
from utils.lr_finder import lr_plot, lr_find
from utils.common import get_batch_info
from utils.checkpoint import save_checkpoint, load_cp_model, load_cp_optim
from utils.logger import logger
from model.dataset import LabelDataset, BBDataset
from model.arch.header import Header, Res50ClassHead
from model.arch.respneunet import ResPneuNet
from model.criterion import LabelBoundBoxCriterion
from model.train import train_step, fit_model
from model.validation import validate_model
from model.optim import CLR
from model.test import predict_model
from utils.pytorch import accuracy

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [9]:
label_df = pd.read_csv(label_repo)

In [10]:
train_idx = np.arange(len(label_df))
dev_idx, val_idx = train_test_split(train_idx, test_size = 0.20)
dev_df = label_df.iloc[dev_idx,:].reset_index(drop = True)
val_df = label_df.iloc[val_idx,:].reset_index(drop = True)

In [11]:
label_train_dataset = LabelDataset(True, device, label_df)
label_dev_dataset = LabelDataset(True, device, dev_df)
label_val_dataset = LabelDataset(True, device, val_df)
label_test_dataset = LabelDataset(False, device)

In [12]:
train_dataloader = DataLoader(label_train_dataset, batch_size = 32)
dev_dataloader = DataLoader(label_dev_dataset, batch_size = 32, shuffle = True)
val_dataloader = DataLoader(label_val_dataset, batch_size = 32)
test_dataloader = DataLoader(label_test_dataset, batch_size = 32)

In [13]:
preload_model = torchvision.models.resnet50(pretrained=True).to(device)
header_model = Res50ClassHead([1000], 0.5).to(device)
model = ResPneuNet(preload_model, header_model)

In [14]:
n_epoch = 5
optimizer = optim.Adam([{'params': model.preload_backbone.parameters(), 'lr': 0.0001 }, 
                        {'params': model.header.parameters(), 'lr': 0.001} ], 
                         betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
criterion = nn.BCEWithLogitsLoss().to(device)

In [15]:
n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader)
clr = CLR(optimizer, n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2)
callbacks = [clr]

In [16]:
def loss_fn(model, criterion, data):
    img, target = data
    prediction = model(img)
    loss = criterion(prediction, target)
    return loss

In [17]:
def metric_fn(model, data):
    img, target = data
    prediction = model(img)
    metric = F.binary_cross_entropy_with_logits(prediction, target)
    return metric

In [18]:
def pred_fn(model, data):
    img = data
    prediction = model(img)
    true_prediction = F.sigmoid(prediction)
    return true_prediction.data.cpu().numpy().reshape(-1).tolist()

In [19]:
model = fit_model(model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, val_dataloader, checkpoint = True, model_fn = 'label')

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

 val_loss : 0.36714618926092707, val_metric : 0.36714618926092707


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

 val_loss : 0.35387433176825506, val_metric : 0.35387433176825506


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

 val_loss : 0.42664306633961124, val_metric : 0.42664306633961124


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

 val_loss : 0.35381431243619565, val_metric : 0.35381431243619565


HBox(children=(IntProgress(value=0, max=643), HTML(value='')))

HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

 val_loss : 0.38543738248925774, val_metric : 0.38543738248925774



In [20]:
prediction = predict_model(model, test_dataloader, pred_fn)

HBox(children=(IntProgress(value=0, max=32), HTML(value='')))




In [21]:
patientid = test_dataloader.dataset.patientId
pneu_prob = prediction

In [22]:
result_df = pd.DataFrame({
    'name' : patientid,
    'prob' : pneu_prob
})

In [23]:
result_df.to_csv(label_predict_repo, index = False)