In [6]:
# Imports
import os
import torch.cuda, torch.utils.data, torch.nn, torch.optim, torch
import torchvision.transforms, torchvision.datasets.folder, torchvision.utils, torchvision.models
import numpy as np
import json
import pickle
import matplotlib.pyplot as plt
import time

In [7]:
# Addresses

class Address:
    def __init__(self):
        '''
        Stores all the addresses used in project
        '''
        # Inputs
        self.data = "../input/mammography"
        self.processed_data = "data.pkl"

        # Coco
        self.coco = os.path.join(self.data, 'coco_1k')
        self.coco_annot = os.path.join(self.coco, 'annotations')
        self.coco_annot_train = os.path.join(self.coco_annot, 'instances_train2017.json')
        self.coco_annot_val = os.path.join(self.coco_annot, 'instances_val2017.json')
        self.coco_img_train = os.path.join(self.coco, 'train2017')
        self.coco_img_val = os.path.join(self.coco, 'val2017')

        # Test
        self.test = os.path.join(self.data, 'test')
        self.test_img = os.path.join(self.test, 'images')
        self.test_label = os.path.join(self.test, 'labels')
        self.predictions = os.path.join(self.test, 'predictions')

        # Yolo
        self.yolo = os.path.join(self.data, 'yolo_1k')
        self.yolo_train = os.path.join(self.yolo, 'train')
        self.yolo_train_img = os.path.join(self.yolo_train, 'images')
        self.yolo_train_label = os.path.join(self.yolo_train, 'labels')
        self.yolo_val = os.path.join(self.yolo, 'val')
        self.yolo_val_img = os.path.join(self.yolo_val, 'images')
        self.yolo_val_labels = os.path.join(self.yolo_val, 'labels')

        # Models
        self.result = "results/"
        self.model_frcnn = os.path.join(self.result, 'frcnn')

        # Temp
        self.temp = "temp/"

    def create_dir(self, dir_list = None):
        '''
        Function to create directories in dir_list. If dir_list is None then create all directories of address.
        '''
        if dir_list == None:
            dir_list = [self.temp, self.result, self.model_frcnn]
        for address in dir_list:
            if not os.path.exists(address):
                os.mkdir(address)

    def _delete_folder_content(self, folder_addr):
        '''
        Deletes all the content of folder_addr
        '''
        if os.path.exists(folder_addr):
            for file in os.listdir(folder_addr):
                address = os.path.join(folder_addr, file)
                if os.path.isdir(address):
                    self._delete_folder_content(address)
                    os.removedirs(address)
                else:
                    os.remove(address)

    def clean(self, file_list = None):
        '''
        Deletes all the content in file_list
        '''
        if file_list == None:
            file_list = [self.temp]
        for address in file_list:
            self._delete_folder_content(address)

addr = Address()
addr.clean()
addr.create_dir()

In [8]:
class HyperParameters:
    def __init__(self):
        '''
        Stores all Hyperparameters used for training of model
        '''
        # Training
        self.batch_size = 2
        self.num_epoch = 10
        self.grad_clip = 1.0

        # Data
        self.num_train = 40000
        self.num_val = 10000
        self.train_step = self.num_epoch*(self.num_train//self.batch_size)
        self.resolution = (512, 1024)
        
        # Learning Rate
        self.lr = 1e-5
        self.warmup_step = self.train_step//40
        self.decay_step = self.train_step//2
        self.decay_rate = 0.5

    def lr_schedule(self, step):
        '''
        Getting learning rate as function of train steps completed (Exponential decay with linear warmup)
        '''
        if step <= self.warmup_step:
            return step/self.warmup_step
        else:
            return self.decay_rate**((step-self.warmup_step)/self.decay_step)

    def create_report(self, addr):
        with open(os.path.join(addr, 'param.txt'), 'w') as file:
            file.writelines([
                f'Training:',
                f'\n\tBatch Size:       {self.batch_size}',
                f'\n\tNum Epoch:        {self.num_epoch}',
                f'\n\tGrad Clip:        {self.grad_clip}',
                f'\n\nData:',  
                f'\n\tNum Train:        {self.num_train}',
                f'\n\tNum Val:          {self.num_val}',
                f'\n\tTrain Step:       {self.train_step}',
                f'\n\tResolution:       {self.resolution}',
                f'\n\nLearning Rate:',  
                f'\n\tlr:               {self.lr}',
                f'\n\tWarmup Step:      {self.warmup_step}',
                f'\n\tDecay Step:       {self.decay_step}',
                f'\n\tDecay Rate:       {self.decay_rate}',
            ])

param = HyperParameters()

In [9]:
# Random Seed and CUDA

random_seed = 68
device = "cpu"
torch.manual_seed(random_seed)
np.random.seed(random_seed)
# if torch.cuda.is_available():
#     torch.cuda.manual_seed_all(random_seed)
#     device = "cuda"
print(f"Working with device {device}")

Working with device cpu


In [10]:
class DataSet(torch.utils.data.Dataset):
    def __init__(self, resolution, address_img, address_annot=None):
        '''
        Creates Dataset of images on given address.
        '''
        self.address_img = address_img
        self.resolution = resolution
        self.img_list = sorted(os.listdir(self.address_img))
        self.transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                                        torchvision.transforms.Resize(resolution, antialias=False)])
        self.img_list = None
        self.img_name_list = sorted(os.listdir(self.address_img))
        with open(address_annot, 'rb') as file:
            self.annotation = json.load(file)
        self.beautify()

    def __len__(self):
        if self.img_list is None:
            return len(os.listdir(self.address_img))
        return len(self.img_list)
    
    def __getitem__(self, idx):
        if self.img_list is None:
            img_addr = os.path.join(self.address_img, self.img_name_list[idx])
            img = torchvision.datasets.folder.default_loader(img_addr)
            return self.transform(img).to(torch.float), self.annotation[idx]
        return self.img_list[idx], self.annotation[idx]
    
    def load(self):
        '''
        Loads all the data in RAM
        '''
        if self.img_list is not None:
            return
        self.img_list = [self[idx][0] for idx in range(len(self))]

    def clean(self):
        '''
        Removes Data from RAM
        '''
        del self.img_list
        self.img_list = None

    def beautify(self):
        '''
        Removes Redundant data from json file
        '''
        label_data = {}
        for img_data in self.annotation['images']:
            label_data[img_data['id']] = img_data
            label_data[img_data['id']]['bbox'] = []
        for img_data in self.annotation['annotations']:
            label_data[img_data['id']]['bbox'].append(img_data['bbox'])
            img_data['bbox'][2] += img_data['bbox'][0]
            img_data['bbox'][3] += img_data['bbox'][1]
            img_data['bbox'][0] = img_data['bbox'][0]*self.resolution[0]/label_data[img_data['id']]['width']
            img_data['bbox'][1] = img_data['bbox'][1]*self.resolution[1]/label_data[img_data['id']]['height']
            img_data['bbox'][2] = img_data['bbox'][2]*self.resolution[0]/label_data[img_data['id']]['width']
            img_data['bbox'][3] = img_data['bbox'][3]*self.resolution[1]/label_data[img_data['id']]['height']
        self.annotation = sorted(label_data.values(), key = lambda data: data['file_name'])

class Data:
    def __init__(self, address: Address, param: HyperParameters):
        '''
        Creates DataLoader and DataSet for both train and val split
        '''
        self.address = address
        self.param = param

        # Dataset
        self.dataset_train = DataSet(param.resolution, address.coco_img_train, address.coco_annot_train)
        self.dataset_val = DataSet(param.resolution, address.coco_img_val, address.coco_annot_val)

        # DataLoader
        self.loader_train = torch.utils.data.DataLoader(self.dataset_train, batch_size=param.batch_size, collate_fn=self.collate, shuffle=True)
        self.loader_val = torch.utils.data.DataLoader(self.dataset_val, batch_size=param.batch_size, collate_fn=self.collate, shuffle=False)
    
    def collate(self, batch):
        label = []
        img = []
        for elem in batch:
            img.append(elem[0].to(device))
            if elem[1]['bbox']:
                label_elem = {'boxes': torch.tensor(elem[1]['bbox'], dtype=torch.float, device=device), 
                              'labels': torch.tensor([0]*len(elem[1]['bbox']), dtype=torch.int64, device=device)}
            else:
                label_elem = {'boxes': torch.tensor([], dtype=torch.float, device=device).reshape(0, 4),
                              'labels': torch.tensor([], dtype=torch.int64, device=device).reshape(0)}
            label.append(label_elem)
    
        return img, label

    def load(self):
        '''
        Loads all the data in RAM
        '''
        start_time = time.time()
        self.dataset_train.load()
        self.dataset_val.load()
        print('Data Loaded in time:', time.time()-start_time)

    def clean(self):
        '''
        Loads all the data in RAM
        '''
        self.dataset_train.clean()
        self.dataset_val.clean()

data = Data(addr, param)
# data.load()

In [11]:
# Learning Model
class LearnModel:
    def __init__(self, model: torch.nn.Module, mtype, model_addr, data=data, param=param, device=device):
        '''
        Train, Evaluate and Predict
        '''
        
        self.data = data
        self.param = param
        self.device = device
        self.model = model.to(device)
        self.model_addr = model_addr
        self.mtype = mtype

        # Addresses
        self.loss_addr = os.path.join(self.model_addr, 'loss.npz')
        self.epoch_addr = lambda epoch: os.path.join(self.model_addr, f'model/{epoch}.pth')
        self.scheduler_addr = lambda epoch: os.path.join(self.model_addr, f'scheduler/{epoch}.pth')
        addr.create_dir([os.path.join(self.model_addr, 'model'),
                         os.path.join(self.model_addr, 'scheduler')])

    def train(self, epoch_log = True, batch_log = True, overwrite = False):
        optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.param.lr)
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda = self.param.lr_schedule)
        start_time = time.time()

        # Loss arr
        if os.path.exists(self.loss_addr):
            loss_arr = np.load(self.loss_addr)
            train_loss_arr = list(loss_arr['train'])
            val_loss_arr = list(loss_arr['val'])
        else:
            train_loss_arr = []
            val_loss_arr = []

        if overwrite:
            addr.clean([self.model_addr])
        
        for epoch in range(self.param.num_epoch):
            epoch_addr = self.epoch_addr(epoch)
            scheduler_addr = self.scheduler_addr(epoch)

            # Loading Model if present
            if os.path.exists(epoch_addr) and os.path.exists(scheduler_addr):
                self.model.load_state_dict(torch.load(epoch_addr), strict=False)
                scheduler.load_state_dict(torch.load(scheduler_addr))
                print(f"Loaded model and scheduler at epoch {epoch}")
                continue

            # Training Model
            train_loss = self.train_epoch(optimizer, scheduler, batch_log=batch_log)
            if epoch_log:
                print(f'Epoch: {epoch}\tTrain Loss: {train_loss}\tTime: {time.time()-start_time}')

            # Validating Model
            val_loss = self.validate_epoch(self.data.loader_val, batch_log=False)
            if epoch_log:
                print(f'Epoch: {epoch}\tVal Loss: {val_loss}\tTime: {time.time()-start_time}')

            # Saving data
            train_loss_arr.append(train_loss)
            val_loss_arr.append(val_loss)
            np.savez_compressed(self.loss_addr, train=np.array(train_loss_arr), val=np.array(val_loss_arr))     # Saving Loss Array
            torch.save(self.model.state_dict(), epoch_addr)     # Saving Model
            torch.save(scheduler.state_dict(), scheduler_addr)  # Saving Scheduler

            # Printing blank line between each epoch in Log
            if epoch_log:
                print()
        
    def train_epoch(self, optimizer, scheduler, batch_log):
        '''
        Trains model for one epoch
        '''
        epoch_loss = 0
        batch_ct = 0
        dataloader = self.data.loader_train
        start_time = time.time()

        self.model.train()          # Set Model to train Mode

        for data in dataloader:
            # Copying data to cuda
            x, y = data

            # Forward Propagation
            loss_dict = self.model(x, y)

            # Computing Loss
            loss = sum(loss_dict.values())
            epoch_loss += loss.item()

            # Back Propagation
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.param.grad_clip)
            optimizer.step()
            scheduler.step()

            # Update batch count
            batch_ct += 1

            if batch_log and batch_ct%25 == 0:
                print(f"\tBatch {batch_ct}\tLoss: {epoch_loss/batch_ct}\tTime: {time.time()-start_time}")

        return epoch_loss/batch_ct
    
    def validate_epoch(self, dataloader, batch_log):
        '''
        Calculates Loss on data in given dataloader
        '''
        epoch_loss = 0
        batch_ct = 0
        start_time = time.time()

        self.model.eval()           # Set Model to eval mode

        with torch.no_grad():
            for data in dataloader:
                # Copying data to cuda
                x, y = data

                # Forward Propagation
                output_dict = self.model(x)

                # Update batch count
                batch_ct += 1

                if batch_log and batch_ct%25 == 0:
                    print(f"\tBatch {batch_ct}\tLoss: {epoch_loss/batch_ct}\tTime: {time.time()-start_time}")

        return epoch_loss/batch_ct

    def plot_loss(self, addr = None):
        '''
        Plots Loss vs number of epochs
        '''
        if not os.path.exists(self.loss_addr):
            raise Exception("No Loss Array")
        loss_arr = np.load(self.loss_addr)
        train_arr, val_arr = loss_arr['train'], loss_arr['val']
        num_epoch = train_arr.shape[0]
        x_arr = np.linspace(1, num_epoch, num_epoch)

        if addr is None:
            addr = os.path.join(self.model_addr, 'loss_curve')

        plt.title("Loss Curve")
        plt.xlabel("Number of Epochs")
        plt.ylabel("MSE Loss")
        plt.plot(x_arr, train_arr, label='Train')
        plt.plot(x_arr, val_arr, label='Val')
        plt.legend()
        plt.savefig(addr)

    def best_model(self):
        '''
        Returns Best Model as well as changes self.model in place to best model
        '''
        if not os.path.exists(self.loss_addr):
            raise Exception("No Loss Array")
        loss_arr = np.load(self.loss_addr)
        best_epoch = np.argmin(loss_arr['val'])
        self.model.load_state_dict(torch.load(self.epoch_addr(best_epoch)), strict=False)
        return self.model


In [12]:
# Model
model_conv = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes = 2,
                                                                  trainable_backbone_layers = 5)
model_conv = model_conv.to(device)
learner_conv = LearnModel(model_conv, 'frcnn', addr.model_frcnn, data, param, device=device)
learner_conv.param.create_report(learner_conv.model_addr)
learner_conv.train()

KeyboardInterrupt: 