# Model Introduction
This model is an implementation of the FastRCNNPredictor Object detection model for the tensorflow Kaggle competition.

This model is done using Pytorch and is interfaced using Tensorboard

In [1]:
import torch
import torchvision
# from torchvision import transforms
from torch.utils import data
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torch.utils.tensorboard import SummaryWriter

%matplotlib inline

enable_gpu = True

DEBUG = False

device = torch.device("cpu")
if enable_gpu:
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


Launch the Tensorboard Environment for monitoring the preformance of the model and the output states
To open it run the following command

``` bash
  tensorboard --log_dir runs 
```


In [2]:
writer = SummaryWriter()

## Dataset

The dataset shown below contains the metadata for each image and the annotations

In [3]:
train_data = pd.read_csv("train.csv")
train_data

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations
0,0,40258,0,0,0-0,[]
1,0,40258,1,1,0-1,[]
2,0,40258,2,2,0-2,[]
3,0,40258,3,3,0-3,[]
4,0,40258,4,4,0-4,[]
...,...,...,...,...,...,...
23496,2,29859,10755,2983,2-10755,[]
23497,2,29859,10756,2984,2-10756,[]
23498,2,29859,10757,2985,2-10757,[]
23499,2,29859,10758,2986,2-10758,[]


### Splitting the Dataset

These models are not designed to be trained on empty annotation sets. To filter these out and create a dataset that is useable, the annotation lists that are not empty are taken out.
This does tamper with the index so the index is reset for easy indexing

In [4]:
anno = train_data[train_data.annotations!='[]'].reset_index(drop=True)
anno

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations
0,0,40258,16,16,0-16,"[{'x': 559, 'y': 213, 'width': 50, 'height': 32}]"
1,0,40258,17,17,0-17,"[{'x': 558, 'y': 213, 'width': 50, 'height': 32}]"
2,0,40258,18,18,0-18,"[{'x': 557, 'y': 213, 'width': 50, 'height': 32}]"
3,0,40258,19,19,0-19,"[{'x': 556, 'y': 214, 'width': 50, 'height': 32}]"
4,0,40258,20,20,0-20,"[{'x': 555, 'y': 214, 'width': 50, 'height': 32}]"
...,...,...,...,...,...,...
4914,2,29859,10628,2856,2-10628,"[{'x': 92, 'y': 532, 'width': 40, 'height': 37}]"
4915,2,29859,10629,2857,2-10629,"[{'x': 78, 'y': 569, 'width': 40, 'height': 37}]"
4916,2,29859,10630,2858,2-10630,"[{'x': 65, 'y': 606, 'width': 41, 'height': 37}]"
4917,2,29859,10631,2859,2-10631,"[{'x': 51, 'y': 643, 'width': 44, 'height': 37}]"


After that is done, we need to create our three dataset from this dataset in order to check the overall performance of the model. 
To do this, the dataset is converted to a numpy array and split using train test split

In [5]:
numpy_index = anno.to_numpy()

In [6]:
X = np.array([numpy_index[:,0],numpy_index[:,2]]).transpose()
Y = numpy_index[:,-1]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.30)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5)

### Pytorch Dataset

This is the Pytorch Dataset for the which is then used for intergrating Pytorch's dataloader
This class is made with the variblity needed for this dataset

In [7]:
class VisionDataset(data.Dataset):

    def __init__(self, data_x, data_y, transforms = None):
        self.images = data_x
        self.anntations = data_y
        self.transforms = transforms
            
    def __getitem__(self, index):
        video_num = self.images[index][0]
        frame_num = self.images[index][1]
        img = Image.open(f"./train_images/video_{video_num}/{frame_num}.jpg")
        annotations_input = list(eval(self.anntations[index]))
        annotation_list = []
        area_list = []
        for annotation in annotations_input:
            xmin = annotation['x']
            ymin = annotation['y']
            xmax = xmin + annotation['width']
            ymax = ymin + annotation['height']
            area_list.append((ymax - ymin)*(xmax - xmin))
            annotation_list.append([xmin, ymin, xmax, ymax])
        
        boxes = torch.as_tensor(annotation_list, dtype=torch.float16)
        labels = torch.ones((len(annotation_list), ), dtype=torch.int64)
        img_id = torch.tensor([frame_num])
        areas = torch.as_tensor(area_list, dtype=torch.float32)
        iscrowd = torch.zeros((len(annotation_list)), dtype=torch.int32)

        annotations = {}
        annotations['boxes'] = boxes
        annotations['labels'] = labels
        annotations['image_id'] = img_id
        annotations['area'] = areas
        annotations['iscrowd'] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)
        return img, annotations

    def __len__(self) -> int:
        return len(self.images)

Here is a minimal list of transforms.

THe batch size is four at max due to memory limitations ( 6GB of VRAM is not enough for this model)
The debug size is used when debugging the model

In [8]:
transform_list = [ 
                  torchvision.transforms.ToTensor()]
transforms = torchvision.transforms.Compose(transform_list)

batch_size = 8

if DEBUG:
  batch_size = 2 #

train_dataset = VisionDataset(x_train, y_train, transforms)

val_dataset = VisionDataset(x_val, y_val, transforms)

test_dataset = VisionDataset(x_test, y_test, transforms)

### Building the Dataloaders

Using the datasets above we create a dataloaders using Pytorch's built in dataloader

In [9]:
def collate_fn(batch):
  return tuple(zip(*batch))

train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=6, collate_fn=collate_fn)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=6, collate_fn=collate_fn)


## Model

THe Resnet50 Model is the one in Pytorch's Library.

In [10]:
def get_model_instance_segmentation(num_classes):

  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)

  in_features = model.roi_heads.box_predictor.cls_score.in_features

  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

  return model

## Train Functions

These functions consolidates the train and validate code into a single function inside each training loop

The debug versions were made in order to find errors with the each step for training

In [11]:
def train(model, dataloader : data.DataLoader, tenosrboard_writer : SummaryWriter, max_iters = None):
  i = 0
  losses = 0
  for imgs, annotations in dataloader:
    i += 1
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    loss_dict = model(imgs, annotations)
    losses = sum(loss for loss in loss_dict.values())
    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
    print("\r", end="")
    print(f'Iteration: {i}/{len(train_dataloader)}, Loss: {losses}', end="")
    tenosrboard_writer.add_scalar("Training Batch Losses", losses)
    if max_iters is not None:
      if i > max_iters:
        print("Exiting Validation Loop")
        break
  
  return losses

In [12]:
def train_debug(model, dataloader : data.DataLoader, tenosrboard_writer : SummaryWriter,  num_iters = 99999999):
  i = 0
  losses = 0
  for imgs, annotations in dataloader:
    i += 1
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    loss_dict = model(imgs, annotations)
    losses = sum(loss for loss in loss_dict.values())
    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
    print("\r", end="")
    print(f'Iteration: {i}/{len(train_dataloader)}, Loss: {losses}', end="")
    tenosrboard_writer.add_scalar("Training Batch Losses", losses)
    if i > num_iters:
      print("Exiting Training Loop")
      break
  
  return losses

## Validation Functions

These functions check the current accuracy of the model and publish it to tensorboard.
It also take the first validation batch and checks puts those images onto the tensorboard to see how the model is progressing.

In [13]:
def validate(model, dataloader : data.DataLoader, tenosrboard_writer : SummaryWriter, img_count = 0, max_iters = None):
  val_loss = 0
  with torch.no_grad():
    j = 0
    i = 0
    model.train()
    for imgs, annotations in dataloader:
      i += 1
      imgs = list(img.to(device) for img in imgs)
      annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
      loss_dict = model(imgs, annotations)
      losses = sum(loss for loss in loss_dict.values())
      if j == 0:
        j = 1
        model.eval()
        pre_annotations = model(imgs)
        imgs = list(img.to("cpu") for img in imgs)
        act_annotations = [{k: v.to("cpu") for k, v in t.items()} for t in annotations]
        act_imgs = []
        pre_imgs = []
        for i in range(len(imgs)):
          act_imgs.append(torchvision.utils.draw_bounding_boxes(imgs[i].type(torch.uint8),
                                                                act_annotations[i]['boxes']))
          pre_imgs.append(torchvision.utils.draw_bounding_boxes(imgs[i].type(torch.uint8), 
                                                                pre_annotations[i]['boxes']))
        act_grid = torchvision.utils.make_grid(act_imgs)
        pre_grid = torchvision.utils.make_grid(pre_imgs)
        tenosrboard_writer.add_image("Actual Labels", act_grid, img_count)
        tenosrboard_writer.add_image("Predicted Labels", pre_grid, img_count)
        model.train()
        if max_iters is not None:
          if i > max_iters:
            print("Exiting Validation Loop")
            break
        
  return val_loss


In [14]:
def validate_debug(model, dataloader : data.DataLoader, tenosrboard_writer : SummaryWriter, img_count = 0, ):
  val_loss = 0
  with torch.no_grad():
    j = 0
    i = 0
    model.train()
    for imgs, annotations in dataloader:
      i += 1
      imgs = list(img.to(device) for img in imgs)
      annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
      loss_dict = model(imgs, annotations)
      val_loss = sum(loss for loss in loss_dict.values())
      if j == 0:
        model.eval()
        j = 1
        pre_annotations = model(imgs)
        imgs = list(img.to("cpu")*255 for img in imgs)
        act_annotations = [{k: v.to("cpu") for k, v in t.items()} for t in annotations]
        act_imgs = []
        pre_imgs = []
        for i in range(len(imgs)):
          act_imgs.append(torchvision.utils.draw_bounding_boxes(imgs[i].type(torch.uint8),
                                                                act_annotations[i]['boxes']))
          pre_imgs.append(torchvision.utils.draw_bounding_boxes(imgs[i].type(torch.uint8), 
                                                                pre_annotations[i]['boxes']))
        act_grid = torchvision.utils.make_grid(act_imgs)
        pre_grid = torchvision.utils.make_grid(pre_imgs)
        tenosrboard_writer.add_image("Actual Labels", act_grid, img_count)
        tenosrboard_writer.add_image("Predicted Labels", pre_grid, img_count)
        model.train()
        
      if i > max_iters:
        print("Exiting Validation Loop")
        break
  return val_loss

## Hyperparameters (Sans Batch Size)



In [15]:
num_classes = 2
num_epochs = 40
if DEBUG:
  num_epochs = 2

model = get_model_instance_segmentation(num_classes)

model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr = 0.005, momentum=0.9, weight_decay=0.005)
losses = 0
val_loss = 0
num_batches_run = 0

In [16]:
for epoch in range(num_epochs):
  train_losses = 0
  val_loss = 0
  if DEBUG:
    train_losses = train_debug(model, train_dataloader, writer, 10)
    val_loss = validate_debug(model, val_dataloader, writer, epoch, 10)
  else:
    train_losses = train(model, train_dataloader, writer)
    val_loss = validate(model, val_dataloader, writer, epoch)
  print(f'Epoch Length: {len(train_dataloader)}, Train Loss: {losses}, Val Loss: {val_loss}')
  writer.add_scalars("Epoch Losses", {"Test":train_losses, "Validation":val_loss}, epoch)

Iteration: 431/431, Loss: 0.23095811903476715Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.49440270662307743Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.18438373506069183Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.22802816331386566Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.16689980030059814Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.17525008320808413Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.36067140102386475Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.44644123315811167Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.30061116814613343Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.29209059476852417Epoch Length: 431, Train Loss: 0, Val Loss: 0
Iteration: 431/431, Loss: 0.45992541313171387Epoch Length: 431, Train Loss: 0, Val Loss: 0

In [17]:
test_dataloader = data.DataLoader(test_dataset, 1, num_workers=12, collate_fn=collate_fn)

test_loss_values = []

i = 0

model.train()

with torch.no_grad():
  for imgs, annotations in test_dataloader:
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    loss_dict = model(imgs, annotations)
    test_loss = sum(loss for loss in loss_dict.values())
    test_loss_values.append(test_loss)

print(f'Test Losses: {sum(test_loss_values)/len(test_dataset)}')

Test Losses: 0.2644938826560974


In [18]:
torch.save(model, "Model_Result.pt")

In [19]:
writer.close()