<a href="https://colab.research.google.com/github/harshatejas/pytorch_custom_object_detection/blob/main/Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Imports
import numpy as np
import pandas as pd
import os, re, cv2, pydicom, warnings

from glob import glob
from PIL import Image
from matplotlib import pyplot as plt
from pydicom.pixel_data_handlers.util import apply_voi_lut

import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from engine import train_one_epoch, evaluate
import utils
import transforms as T

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from albumentations.core.transforms_interface import ImageOnlyTransform
# from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

In [3]:
# Hyperparameters
# test_set_length = 40 		 # Test set (number of images)
val_set_length = 3000
train_batch_size = 16  		 # Train batch size
# test_batch_size = 16    		 # Test batch size
val_batch_size = 16   
num_classes = 14+1        		 # Number of classes
learning_rate = 0.005  		 # Learning rate
num_epochs = 100    	     # Number of epochs
output_dir = "/data1/geun_19/G-ff/py-faster_rcnn/weight/"   # Output directory to save the model

In [4]:
base_dir = "/data1/geun_19/G-ff/py-faster_rcnn/data/"
train_dicom_dir = base_dir + "train/"
test_dicom_dir =  base_dir + "test/"
val_dicom_dir =  base_dir + "val/"

train_df_dir = base_dir +"f_train.csv"
valid_df_dir = base_dir +"f_val.csv"

In [5]:
train_df = pd.read_csv(train_df_dir)
valid_df = pd.read_csv(valid_df_dir)


In [6]:
labels = train_df['class_name'].unique()
labels_dict = {}
for index, label in enumerate(labels):
	labels_dict.__setitem__(index, label)

In [7]:
labels_dict

{0: 'No finding',
 1: 'Cardiomegaly',
 2: 'Aortic enlargement',
 3: 'Pleural thickening',
 4: 'ILD',
 5: 'Pulmonary fibrosis',
 6: 'Lung Opacity',
 7: 'Atelectasis',
 8: 'Other lesion',
 9: 'Infiltration',
 10: 'Nodule/Mass',
 11: 'Pleural effusion',
 12: 'Consolidation',
 13: 'Calcification',
 14: 'Pneumothorax'}

In [8]:
# Helper functions 
def create_label_txt(path_to_csv):

	data = pd.read_csv(path_to_csv)
	labels = data['class_name'].unique()

	labels_dict = {}

	# Creat dictionary from array
	for index, label in enumerate(labels):
		labels_dict.__setitem__(index, label)
	

	# We need to create labels.txt and write labels dictionary into it
	with open('/data1/geun_19/pytorch_custom_object_detection/data/labels.txt', 'w') as f:
		f.write(str(labels_dict))

	return labels_dict	

def parse_one_annot(path, filename, labels_dict):

	data = pd.read_csv(path)

	class_names = data['class_name'].unique()
	# classes_df = data[data["filename "] == filename]["class"]
	classes_df = data[data["image_id"]+".dicom" == filename]["class_name"]
 
	classes_array = classes_df.to_numpy()
	
	# boxes_df = data[data["filename"] == filename][["xmin", "ymin", "xmax", "ymax"]]
	boxes_df = data[data["image_id"]+".dicom" == filename][["x_min", "y_min", "x_max", "y_max"]]
 
	boxes_array = boxes_df.to_numpy()
	
	classes = []
	for key, value in labels_dict.items():
		for i in classes_array:
			if i == value:
				classes.append(key)

	# Convert list to tuple
	classes = tuple(classes)

	return boxes_array, classes

def get_model(num_classes):

	# Load an pre-trained object detectin model (in this case faster-rcnn)
	model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)

	# Number of input features
	in_features = model.roi_heads.box_predictor.cls_score.in_features

	# Replace the pre-trained head with a new head
	model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

	return model


def get_transforms(train):

	transforms = []

	# Convert numpy image to PyTorch Tensor
	transforms.append(T.ToTensor())

	if train:
		# Data augmentation
		transforms.append(T.RandomHorizontalFlip(0.5))

	return T.Compose(transforms) 

In [9]:
# class CardsDataset(torch.utils.data.Dataset):
class BigDataset(torch.utils.data.Dataset):

	""" The dataset contains images of playing cards 
		The dataset includes images of king, queen, jack, ten, nine and ace playing cards"""

	def __init__(self, dataset_dir, csv_file, labels_dict, transforms = None):
		# dataset_dir = dicom_dir
		self.dataset_dir = dataset_dir
		self.csv_file = csv_file
		self.transforms = transforms
		self.labels_dict = labels_dict
		# self.image_names = [file for file in sorted(os.listdir(os.path.join(dataset_dir))) if file.endswith('.jpg') or file.endswith('.JPG')]
		self.image_names = [file for file in sorted(os.listdir(os.path.join(dataset_dir))) if file.endswith('.dicom')]

	def __getitem__(self, index):

		image_path = os.path.join(self.dataset_dir, self.image_names[index])
		# image = cv2.imread(image_path)

		dicom = pydicom.dcmread(image_path)
		# Convert BGR to RGB
		# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
		image = dicom.pixel_array

		box_array, classes = parse_one_annot(self.csv_file, self.image_names[index], self.labels_dict)
		boxes = torch.as_tensor(box_array, dtype = torch.float32)

		labels = torch.tensor(classes, dtype=torch.int64)
		
		image_id = torch.tensor([index])
		area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

		iscrowd = torch.tensor(classes, dtype=torch.int64)
		target = {}
		target["boxes"] = boxes
		target["labels"] = labels
		target["image_id"] = image_id
		target["area"] = area
		target["iscrowd"] = iscrowd

		if self.transforms is not None:
			image, target = self.transforms(image, target)

		return image, target

	def __len__(self):

		return len(self.image_names)

In [10]:
train_df_dir

'/data1/geun_19/G-ff/py-faster_rcnn/data/f_train.csv'

In [11]:
train_dicom_dir

'/data1/geun_19/G-ff/py-faster_rcnn/data/train/'

In [12]:
class VinBigDataset(Dataset): #Class to load Training Data
    
    def __init__(self, dataframe, image_dir, transforms=None,stat = 'Train'):
        super().__init__()
        
        self.image_ids = dataframe["image_id"].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        self.stat = stat
        
    def __getitem__(self, index):
        if self.stat == 'Train':
            
            image_id = self.image_ids[index]
            records = self.df[(self.df['image_id'] == image_id)]
            records = records.reset_index(drop=True)

            dicom = pydicom.dcmread(f"{self.image_dir}/{image_id}.dicom")

            image = dicom.pixel_array
            
            if "PhotometricInterpretation" in dicom:
                if dicom.PhotometricInterpretation == "MONOCHROME1":
                    image = np.amax(image) - image

            intercept = dicom.RescaleIntercept if "RescaleIntercept" in dicom else 0.0
            slope = dicom.RescaleSlope if "RescaleSlope" in dicom else 1.0

            if slope != 1:
                image = slope * image.astype(np.float64)
                image = image.astype(np.int16)
                
            image += np.int16(intercept)        

            image = np.stack([image, image, image])
            image = image.astype('float32')
            image = image - image.min()
            image = image / image.max()
            image = image * 255.0
            image = image.transpose(1,2,0)

            if records.loc[0, "class_id"] == 0:
                records = records.loc[[0], :]
                
            boxes = records[['x_min', 'y_min', 'x_max', 'y_max']].values
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            area = torch.as_tensor(area, dtype=torch.float32)
            labels = torch.tensor(records["class_id"].values, dtype=torch.int64)

            # suppose all instances are not crowd
            iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)

            target = {}
            target['boxes'] = boxes
            target['labels'] = labels
            target['image_id'] = torch.tensor([index])
            target['area'] = area
            target['iscrowd'] = iscrowd
            
            if self.transforms:
                sample = {
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                }
                sample = self.transforms(**sample)
                image = sample['image']

                target['boxes'] = torch.tensor(sample['bboxes'])

            if target["boxes"].shape[0] == 0:
                # Albumentation cuts the target (class 14, 1x1px in the corner)
                target["boxes"] = torch.from_numpy(np.array([[0.0, 0.0, 1.0, 1.0]]))
                target["area"] = torch.tensor([1.0], dtype=torch.float32)
                target["labels"] = torch.tensor([0], dtype=torch.int64)

            # return image, target, image_ids
            return image, target
        
        else:
                   
            image_id = self.image_ids[index]
            records = self.df[(self.df['image_id'] == image_id)]
            records = records.reset_index(drop=True)

            dicom = pydicom.dcmread(f"{self.image_dir}/{image_id}.dicom")

            image = dicom.pixel_array

            intercept = dicom.RescaleIntercept if "RescaleIntercept" in dicom else 0.0
            slope = dicom.RescaleSlope if "RescaleSlope" in dicom else 1.0

            if slope != 1:
                image = slope * image.astype(np.float64)
                image = image.astype(np.int16)

            image += np.int16(intercept)
            
            image = np.stack([image, image, image])
            image = image.astype('float32')
            image = image - image.min()
            image = image / image.max()
            image = image * 255.0
            image = image.transpose(1,2,0)

            if self.transforms:
                sample = {
                    'image': image,
                }
                sample = self.transforms(**sample)
                image = sample['image']

            # return image, image_id
            return image
    
    def __len__(self):
        return self.image_ids.shape[0]

def dilation(img): # custom image processing function
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, tuple(np.random.randint(1, 6, 2)))
    img = cv2.dilate(img, kernel, iterations=1)
    return img

class Dilation(ImageOnlyTransform):
    def apply(self, img, **params):
        return dilation(img)   
    
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=45, p=0.25),
        A.LongestMaxSize(max_size=800, p=1.0),
        Dilation(),
        # FasterRCNN will normalize.
        A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
    
def get_valid_transform():
    return A.Compose([
        A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_test_transform():
    return A.Compose([
        A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0)
    ])

In [13]:
import torch

num_workers = 4 * torch.cuda.device_count()

In [14]:
# Setting up the device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
x = torch.tensor([1., 2.]).to(device)

In [15]:

# labels_dict = create_label_txt("cards_dataset/train_labels.csv")
labels_dict = create_label_txt(train_df_dir)

# Define train and test dataset
# dataset = CardsDataset(dataset_dir = "cards_dataset/train/", csv_file = "cards_dataset/train_labels.csv",
#                         labels_dict = labels_dict, transforms = get_transforms(train = True))
# dataset = VinBigDataset(dataset_dir = train_dicom_dir, csv_file = train_df_dir,
#                         labels_dict = labels_dict, transforms = get_transforms(train = True))

# dataset_test = CardsDataset(dataset_dir = "cards_dataset/train/", csv_file = "cards_dataset/train_labels.csv", 
#                         labels_dict = labels_dict, transforms = get_transforms(train = False))
# dataset_val = VinBigDataset(dataset_dir = val_dicom_dir, csv_file = valid_df_dir, 
#                         labels_dict = labels_dict, transforms = get_transforms(train = False))

train_dataset = VinBigDataset(train_df, train_dicom_dir, get_train_transform())
valid_dataset = VinBigDataset(valid_df, val_dicom_dir, get_valid_transform())

# Split the dataset into train and test
# torch.manual_seed(1)
# indices = torch.randperm(len(dataset)).tolist()
# dataset = torch.utils.data.Subset(dataset, indices[:-test_set_length])
# dataset_test = torch.utils.data.Subset(dataset_test, indices[-test_set_length:])

# Define train and test dataloaders
# data_loader = torch.utils.data.DataLoader(dataset, batch_size = train_batch_size, shuffle = True,
#                 num_workers = 2, collate_fn = utils.collate_fn)

# data_loader_test = torch.utils.data.DataLoader(dataset_val, batch_size = val_batch_size, shuffle = False,
#                 num_workers = 2, collate_fn = utils.collate_fn)
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size = train_batch_size, shuffle = True,
                num_workers = 2, collate_fn = utils.collate_fn)

valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size = val_batch_size, shuffle = False,
                num_workers = 2, collate_fn = utils.collate_fn)

# print(f"We have: {len(indices)} images in the dataset, {len(dataset)} are training images and {len(dataset_test)} are test images")
print(f"We have: {len(train_dataset)+len(valid_dataset)} images in the dataset, {len(train_dataset)} are training images and {len(valid_dataset)} are validation images")


We have: 15000 images in the dataset, 12000 are training images and 3000 are validation images


In [16]:
train_data_loader

<torch.utils.data.dataloader.DataLoader at 0x7f66f068fad0>

In [17]:
train_dataset

<__main__.VinBigDataset at 0x7f66f068fdd0>

In [44]:
output_dir

'/data1/geun_19/G-ff/py-faster_rcnn/weight/'

In [58]:
# Get the model using helper function
model = get_model(num_classes)
model.to(device = device)

# Construct the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr = learning_rate, momentum = 0.9, weight_decay = 0.0005)

# Learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)

for epoch in range(num_epochs):
    
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq = 10)
    lr_scheduler.step()
    # Evaluate on the test dataset
    evaluate(model, valid_data_loader, device = device)

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# Save the model state	
# torch.save(model.state_dict(), output_dir + "/model")
torch.save(model.state_dict(), output_dir)

RuntimeError: CUDA out of memory. Tried to allocate 800.00 MiB (GPU 0; 11.91 GiB total capacity; 8.31 GiB already allocated; 735.00 MiB free; 10.57 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

: 

# new code

https://www.kaggle.com/code/mariazorkaltseva/vinbigdata-eda-faster-rcnn-icevision-training/notebook

In [18]:
# import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import pytorch_lightning as pl
from pytorch_lightning.loggers import CSVLogger

from sklearn.model_selection import GroupKFold
# from icevision.all import *

warnings.filterwarnings('ignore')

In [19]:
import random

In [20]:
SEED = 2021
DEBUG = False
IMG_DIM = 512
RESIZE_DIM = 384
PRESIZE = 512
BATCH_SIZE = 16 # 48
NUM_WORKERS = 4
N_FOLDS = 5
FOLDS_IDS = [0]
LR = 1e-5
WDECAY = 1e-4
NUM_EPOCHS = 60

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [21]:
# Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation
!wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

# Choose your installation target: cuda11 or cuda10 or cpu
!bash icevision_install.sh cuda11

--2022-05-11 15:42:36--  https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2766 (2.7K) [text/plain]
Saving to: ‘icevision_install.sh’


2022-05-11 15:42:36 (22.3 MB/s) - ‘icevision_install.sh’ saved [2766/2766]

Installing icevision + dependencices for cuda11
- Installing torch and its dependencies
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.10.0+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.10.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (2137.6 MB)
[K     |████████████████████████████████| 2137.6 MB 504 bytes/s a 0:00:01   |█▎                              | 82.7 MB 13.1 MB/s eta 0:02:38     |█████████████████▏           

In [21]:
# Restart kernel after installation
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

: 

In [None]:
class PL_Model(faster_rcnn.lightning.ModelAdapter):       
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=LR, weight_decay=WDECAY)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                               factor=0.1, 
                                                               mode='min', 
                                                               patience=10)
        
#         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 
#                                                                12,
#                                                                eta_min=0.01, 
#                                                                last_epoch=-1)
        return [optimizer], [{"scheduler": scheduler,
                              "interval": 'epoch',
                              'monitor': 'val_loss'}]