In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pydicom
import time
import copy
import torchvision.models as models
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from matplotlib.patches import Rectangle
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader,Subset
import cv2
DEVICE=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DATA_DIR = "../input/rsna-pneumonia-detection-challenge"
print(torch.__version__)

1.5.1


In [2]:
#df = pd.read_csv(f'{DATA_DIR}/stage_2_train_labels.csv')

In [3]:
def parse_data(df,data_dir,loc,dataformat="dict"):
    """
    Method to read a CSV file (Pandas dataframe) and parse the 
    data into the following nested dictionary:

      parsed = {
        
        'patientId-00': {
            'dicom': path/to/dicom/file,
            'label': either 0 or 1 for normal or pnuemonia, 
            'boxes': list of box(es)
        },
        'patientId-01': {
            'dicom': path/to/dicom/file,
            'label': either 0 or 1 for normal or pnuemonia, 
            'boxes': list of box(es)
        }, ...

      }

    """
    # --- Define lambda to extract coords in list [y, x, height, width]
    extract_box = lambda row: [row['x'], row['y'], row['width'], row['height']]#coco format topleft

    parsed = {}
    for n, row in df.iterrows():
        # --- Initialize patient entry into parsed 
        pid = row['patientId']
        if pid not in parsed:
            parsed[pid] = {
                'dicom': f'{data_dir}/stage_2_{loc}_images/{pid}.dcm',
                'label': row['Target'],
                'boxes': []}

        # --- Add box if opacity is present
        if parsed[pid]['label'] == 1:
            parsed[pid]['boxes'].append(extract_box(row))
    if dataformat == "list":
        parsed = [(k,v) for k,v in parsed.items()]

    return parsed

In [4]:
#parsed = parse_data(df,DATA_DIR,"train","list")

In [5]:
class PneumoniaDataset(Dataset):
    """Pneumonia dataset"""
    def __init__(self,data_dir,data_for,transform=None):
            """
            Args :
                    csv_file: path to _dir for csv file continin annotations
                    csv_metadata:extra metadata like opacity
            """
            
            df = pd.read_csv(f'{data_dir}/stage_2_train_labels.csv')
            self.parsed_data = parse_data(df,data_dir,data_for,"list")
            self.transform = transform
            
    def __len__(self):
            return len(self.parsed_data)
    
    def __getitem__(self,idx):
        
            if torch.is_tensor(idx):
                idx = idx.tolist()
            patient_id = self.parsed_data[idx][0]
            #boxes = self.parsed_data[idx][1]["boxes"]
            label = self.parsed_data[idx][1]["label"]
            
            dcm_file = self.parsed_data[idx][1]["dicom"]
            dcm_data = pydicom.read_file(dcm_file)
            image = dcm_data.pixel_array
            image = np.stack([image] * 3, axis=2)
           # boxes = torch.as_tensor(boxes, dtype=torch.float32)
            image_id = torch.as_tensor([idx])
            label = torch.as_tensor([label]).to(torch.float32)
            
           # target = {}
            #target['boxes'] = boxes
            #target['label'] = label
            #target["image_id"] = image_id
            
            if self.transform is not None:
                image = {"image":image}
                image = self.transform(**image)
                image = image["image"]
                image.permute(2,0,1).to(torch.float32)

            return image,label,patient_id
    

In [6]:
def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(0.5),
        A.Resize(224,224,p=1),
        A.Normalize(),
        ToTensorV2(p=1.0)
    ])

def get_valid_transform():
    return A.Compose([
        A.Resize(224,224,p=1),
        A.Normalize(),
        ToTensorV2(p=1.0)
    ])

In [7]:
def get_data_loaders(data_dir,batch_size,val_per):
    
    train_dataset = PneumoniaDataset(data_dir,"train",transform=get_train_transform())
    indices = torch.randperm(len(train_dataset)).tolist()
    val_size = int(val_per*len(train_dataset))
    train_dataset = Subset(train_dataset,indices[:-val_size])
    valid_dataset = PneumoniaDataset(data_dir,"train",transform=get_valid_transform())
    valid_dataset = Subset(valid_dataset,indices[-val_size:])
    print(f'train_ds size{len(train_dataset)}<>val datset size{len(valid_dataset)}')
    train_dataloader = DataLoader(train_dataset,batch_size=batch_size,num_workers=4,shuffle=True)
    valid_dataloader = DataLoader(valid_dataset,batch_size=batch_size,num_workers=4,shuffle=True)
    return {"train":train_dataloader,"val":valid_dataloader}


In [8]:
def initialize_model(model_path=None,pretrained=False):
    model = models.densenet121(pretrained=pretrained)
    num_ftrs = model.classifier.in_features
    model.classifier = nn.Sequential(
                                     nn.Linear(num_ftrs, 1),
                                     nn.Sigmoid())#model.classifier = torch.nn.Linear(1024,1)
    if model_path!=None:
        model.load_state_dict(torch.load(model_path))
    
    return model

In [43]:
def train_model(model, dataloaders, criterion, optimizer,scheduler, num_epochs=25):
    since = time.time()

    val_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for inputs, labels,_ in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    if phase == 'train':
                        outputs  = model(inputs)
                        loss = criterion(outputs, labels)
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                    #ones = torch.ones(outputs.shape[0]).to(DEVICE)
                    preds = (outputs>0.5).float()
        

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                       # scheduler.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += (preds == labels).float().sum()
             #   mess+=len(labels)
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)


            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
                scheduler.step(epoch_loss)
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [10]:
learning_rate = 0.001

In [17]:
model = initialize_model(pretrained=True).to(DEVICE)
data_loaders = get_data_loaders(DATA_DIR,32,0.2)

train_ds size21348<>val datset size5336


In [12]:
Loss = nn.BCELoss()
optim = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim)

In [None]:
model_x = train_model(model, data_loaders, criterion=Loss, optimizer=optim,scheduler=scheduler, num_epochs=10)

Epoch 0/9
----------
train Loss: 0.6130 Acc: 0.7428
val Loss: 0.6191 Acc: 0.7288

Epoch 1/9
----------
train Loss: 0.6129 Acc: 0.7434
val Loss: 0.6261 Acc: 0.7180

Epoch 2/9
----------
train Loss: 0.6133 Acc: 0.7422
val Loss: 0.6198 Acc: 0.7264

Epoch 3/9
----------
train Loss: 0.6127 Acc: 0.7451
val Loss: 0.6257 Acc: 0.7170

Epoch 4/9
----------


In [13]:
DEVICE

device(type='cuda', index=0)

In [18]:
DEVICE=torch.device("cuda:0")

In [16]:
DEVICE

device(type='cpu')

In [17]:
torch.cuda.is_available()

False

In [34]:
klen(torch.tensor([1,2,3]))

3