In [3]:
!pip install -qq wandb
!pip install -qq transformers
!pip install -qq pytorch-lightning

In [4]:
import sys
import json
import wandb
import torchvision
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
from collections import Counter
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from torchvision import transforms
from torchvision.datasets import CocoDetection
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from transformers import AutoFeatureExtractor, AutoImageProcessor, AutoModelForObjectDetection

**Project Paths**

In [5]:
DATA_PATH = '/content/drive/My Drive/MLDS/MIPTDataScience/FinalProject/data'
MODELS_PATH = '/content/drive/My Drive/MLDS/MIPTDataScience/FinalProject/models'
MODULES_PATH = '/content/drive/My Drive/MLDS/MIPTDataScience/FinalProject/modules'

**Import modules from .py files**

I use some functions in Stramlit, and moved them from Google Collab to .py files.

In [8]:
sys.path.insert(0, MODULES_PATH)

In [9]:
from custom_dataset import CustomDataset
from image_helper import annotate_image, annotate_image_predicted

In [10]:
%matplotlib inline

In [11]:
pd.set_option("display.max_columns", None)
sns.set_style("whitegrid", {'axes.grid' : True, 'axes.spines.left': False, 'axes.spines.right': False, 'axes.spines.top': False})

COLORS = ['#1F497D', '#4F81BD',  '#C0504D', '#9BBB59', '#8064A2', '#4BACC6', '#F79646', '#6B7C87', '#8064A2', '#00728C', '#6a1635', '#8EC3D8']
sns.color_palette(COLORS)

In [12]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Project Context

**Streamlit applicatin is available under:**

https://julijjegorov-streamlitcloud-oiltankersdetectionapp-g90gtu.streamlit.app/

Dataset from Kaggle: https://www.kaggle.com/datasets/towardsentropy/oil-storage-tanks?sort=most-comments

From Kaggle:

*Oil storage tanks play an important role in the global economy. Crude oil is stored in tanks at many points between extraction and sale. Storage tanks are also used by nations to stockpile oil reserves. The volume of oil in storage is an important economic indicator. It indicates which oil producing nations are increasing or decreasing production and gives a window into the global demand for energy. At the same time, oil storage information is not transparent. Nations may hide information about oil production, consumption and storage for economic or military reasons. For this reason, companies like Planet and Orbital Insight have made a business of collecting satellite imagery of oil storage tanks and estimating reserve volumes.*

*Tank volume estimation is possible because oil is typically stored in floating head tanks. This particular tank type has a head that sits directly on top of the crude oil to prevent buildup of fumes. As a result, the height of the tank head rises and falls with the volume of oil in the tank. The relative sizes of the exterior shadow cast by the tank itself and the interior shadow cast by the height of the tank head can be used to estimate the tank volume.*

In this project I am going to train a model to detect **float head tanks** and going to leave tank volume estimation part for another day.

## Train / Validation split

Dataset contains images and two annotation files: **labels.json** and 
**labels_coco.json**.

**labels.json** contains labels for all images. Labels are stored as a list of dictionaries, one for each image. Images that do not contain any floating head tanks are given a label of 'skip'. Bounding box labels are in the format of (x, y) coordinate pairs of the four corners of the bounding box.

**labels_coco.json** contain the same labels as the previous file, converted into COCO label format. Here bounding boxes are formatted as [x_min, y_min, width, height]

Note that we can convert bounding boxes to coco format as follows:

x_min, y_min, x_max - x_min, y_max - y_min

where *x_min* and *x_max* are the smallest and largest of two x coordinates, *y_min* and *y_max* are the smallest and largest of two y coordinates, *x_max - x_min* is the width and *y_max - y_min* is the height of the bounding box.

First, let us use **labels.json** file to make a stratified train/validation split.

Let us also check class-balances.


In [13]:
def check_balance(files):
  lables_counter = files['clean_label'].value_counts()
  dict_count = {key: val for key, val in lables_counter.items()}
  dict_count_pct = {key: round(val / len(files), 2) for key, val in lables_counter.items()}
  return dict(sorted(dict_count.items())), dict(sorted(dict_count_pct.items()))

For our purpose we can load data directly to pandas dataframe.

In [14]:
labels_json = pd.read_json(f'{DATA_PATH}/labels.json')
labels_json['clean_label'] = labels_json['label'].apply(lambda x: list(x.keys())[0] if (isinstance(x, dict) and len(x.keys())>0) else 'Skip')

labels_count, labels_count_pct = check_balance(labels_json)
print(labels_count)
print(labels_count_pct)

{'Floating Head Tank': 1476, 'Skip': 8172, 'Tank': 341, 'Tank Cluster': 11}
{'Floating Head Tank': 0.15, 'Skip': 0.82, 'Tank': 0.03, 'Tank Cluster': 0.0}


We are only interested in the largest class: **Floating Head Tank**.

Let us filter only for *Floating Head Tank* and *Skip* classes. 

In [15]:
labels_json = labels_json[(labels_json['clean_label'] == 'Floating Head Tank') | (labels_json['clean_label'] == 'Skip')].copy()
labels_count, labels_count_pct = check_balance(labels_json)
print(labels_count)
print(labels_count_pct)

{'Floating Head Tank': 1476, 'Skip': 8172}
{'Floating Head Tank': 0.15, 'Skip': 0.85}


From Ultralytics, developer of YOLOv5:

https://github.com/ultralytics/yolov5/issues/3913

To get the best results on their YOLO trainings:


*   **Images per class.** ≥ 1500 images per class recommended
*   **Instances per class.** ≥ 10000 instances (labeled objects) per class recommended
*   **Image variety.** Must be representative of deployed environment. For real-world use cases we recommend images from different times of day, different seasons, different weather, different lighting, different angles, different sources (scraped online, collected locally, different cameras) etc.
*   **Label consistency.** All instances of all classes in all images must be labelled. Partial labelling will not work.
*   **Label accuracy.** Labels must closely enclose each object. No space should exist between an object and it's bounding box. No objects should be missing a label.
*   **Background images.** Background images are images with no objects that are added to a dataset to reduce False Positives (FP). We recommend about 0-10% background images to help reduce FPs (COCO has 1000 background images for reference, 1% of the total). No labels are required for background images.


We are at the lower limit of the number of images for the *floating head tank* class. Morevoer we should leave at least 25% to the validation set. This would leave us with 1107 observations in train and 369 in validation.

We alos going to add background images by adding 10% of total floating head tank observations to the set (e.g. 147 bacground images).

**Undersample 'Skip' class:**

In [16]:
label_skip = labels_json[labels_json['clean_label'] == 'Skip']
label_flht = labels_json[labels_json['clean_label'] == 'Floating Head Tank']

label_skip_sample = label_skip.sample(int(0.1*len(label_flht)))
labels_json_undersampled = pd.concat([label_flht, label_skip_sample],axis=0)

labels_count, labels_count_pct = check_balance(labels_json_undersampled)
print(labels_count)
print(labels_count_pct)

{'Floating Head Tank': 1476, 'Skip': 147}
{'Floating Head Tank': 0.91, 'Skip': 0.09}


In [17]:
train_labels, valid_labels = train_test_split(labels_json_undersampled[['id', 'clean_label']], test_size=0.25, stratify=labels_json_undersampled['clean_label'], shuffle=True)

In [18]:
print(f'NUMBER OF IMAGES IN TRAIN SET: {len(train_labels):,} | {len(train_labels) / len(labels_json_undersampled):.2f}')
print(f'NUMBER OF IMAGES IN VALID SET: {len(valid_labels):,} | {len(valid_labels) / len(labels_json_undersampled):.2f}')

NUMBER OF IMAGES IN TRAIN SET: 1,217 | 0.75
NUMBER OF IMAGES IN VALID SET: 406 | 0.25


In [19]:
print(f'BALANCE INITIAL SET: {check_balance(labels_json)}')
print(f'BALANCE TRAIN SET:   {check_balance(train_labels)}')
print(f'BALANCE VALID SET:   {check_balance(valid_labels)}')

BALANCE INITIAL SET: ({'Floating Head Tank': 1476, 'Skip': 8172}, {'Floating Head Tank': 0.15, 'Skip': 0.85})
BALANCE TRAIN SET:   ({'Floating Head Tank': 1107, 'Skip': 110}, {'Floating Head Tank': 0.91, 'Skip': 0.09})
BALANCE VALID SET:   ({'Floating Head Tank': 369, 'Skip': 37}, {'Floating Head Tank': 0.91, 'Skip': 0.09})


At this step, we need to split **labels_coco.json** file into two files **labels_coco_train.json** and **labels_coco_valid.json** in accord with our estimated *train_lables* and *valid_labels*.

In [20]:
with open(f'{DATA_PATH}/labels_coco.json') as f:
  labels_coco_json = json.load(f)

Note that YOLO expects annotations inn the COCO format. 
To get a compatible json file we need to include the following estimates:



*   Annotiation ID: simple iterator from 1 to N
*   Bounding Box: we already have it in the labels_coco.json file
*   Area of the Bounding Box: box height * box width
*   Segmentation of the Bounding Box: [x1, y1, x1, y2, x2, y2, x2, y1]
*   Is Crowd Boolean: tells if there are multiple objects that we cannot segment separately and are within one bounding box







In [21]:
def filter_labels_coco(labels_coco: str, labels_ids: np.ndarray):
  filter_labels = {'categories': labels_coco['categories'], 'annotations': labels_coco['annotations']}

  for idx, annotation in enumerate(filter_labels['annotations']):
    annotation['id'] = idx
    bbox = annotation['bbox']
    x1, x2, y1, y2 = [bbox[0], bbox[0] + bbox[2], bbox[1], bbox[1] + bbox[3]]
    annotation['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 
    annotation['area'] = bbox[2] * bbox[3] 
    annotation['iscrowd'] = 0 

  filter_labels['images'] = [images for images in labels_coco['images'] if images['id'] in labels_ids]
  return filter_labels

In [22]:
labels_coco_train = filter_labels_coco(labels_coco_json, train_labels['id'].values)
labels_coco_valid = filter_labels_coco(labels_coco_json, valid_labels['id'].values)

print(f"NUMBER OF IMAGES IN TRAIN SET: {len(labels_coco_train['images']):,}")
print(f"NUMBER OF IMAGES IN VALID SET: {len(labels_coco_valid['images']):,}")

NUMBER OF IMAGES IN TRAIN SET: 1,217
NUMBER OF IMAGES IN VALID SET: 406


Let's save filtered labels to disk.

In [23]:
with open(f'{DATA_PATH}/labels_coco_train.json', 'w') as f:
    json.dump(labels_coco_train, f)

with open(f'{DATA_PATH}/labels_coco_valid.json', 'w') as f:
    json.dump(labels_coco_valid, f)

In [24]:
with open(f'{DATA_PATH}/labels_coco_train.json') as f:
  labels_coco_train = json.load(f)

with open(f'{DATA_PATH}/labels_coco_valid.json') as f:
  labels_coco_valid = json.load(f)

## Custom Dataset




We are going to use Huggingface model 'hustvl/yolos-tiny'.
I tried using 'hustvl/yolos-small', but it blows up in Collab even with high RAM.

Note that CustomDataset class is imported from the custom_dataset.py file.

In [25]:
feature_extractor = AutoFeatureExtractor.from_pretrained('hustvl/yolos-tiny')
train_dataset = CustomDataset(imgage_folder=(f'{DATA_PATH}/image_patches'), annotation_file = f'{DATA_PATH}/labels_coco_train.json', feature_extractor=feature_extractor)
valid_dataset = CustomDataset(imgage_folder=(f'{DATA_PATH}/image_patches'), annotation_file = f'{DATA_PATH}/labels_coco_valid.json', feature_extractor=feature_extractor)

print("NUMBER OF EXAMPLES IN TRAIN SET:", len(train_dataset))
print("NUMBER OF EXAMPLES IN VALID SET:", len(valid_dataset))

Downloading:   0%|          | 0.00/275 [00:00<?, ?B/s]

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
NUMBER OF EXAMPLES IN TRAIN SET: 1217
NUMBER OF EXAMPLES IN VALID SET: 406


Let us plot random images and check bounding box annotations.
Note that we exclude background images.

*annotate_image* function is imported from *image_helper.py* file.

In [26]:
def plot_random_detection_images(dataset: CocoDetection):
  fig, axes = plt.subplots(1, 3, figsize=(50.5, 50.5), sharex=False)
  fig.subplots_adjust(hspace = 0.3, wspace = 0.3)

  categories = {k: v['name'] for k, v in dataset.coco.cats.items()}

  image_idxs = list()
  while len(image_idxs) < 3:
    image_idx = np.random.choice(dataset.coco.getImgIds())
    if len(dataset.coco.imgToAnns[image_idx]) > 0:
      image_idxs.append(image_idx)

  for idx, image_idx in enumerate(image_idxs):
    image_name = dataset.coco.loadImgs(int(image_idx))[0]['file_name']
    image_path = f'{DATA_PATH}/image_patches/{image_name}'
    annotations = dataset.coco.imgToAnns[image_idx]
    image = annotate_image(image_path, annotations, categories)
    axes[idx].imshow(image)

In [35]:
plot_random_detection_images(train_dataset)

Output hidden; open in https://colab.research.google.com to view.

## YOLO Transfer Learning

I decided to implement the model withing PytorchLightning framework.

While training the model the logs and model weights are saved in wandb.ai (Weghts & Biasis).

In [28]:
def collate_fn(batch):
  pixel_values = [item[0] for item in batch]
  encoding = feature_extractor.pad(pixel_values, return_tensors="pt")
  labels = [item[1] for item in batch]
  batch = {}
  batch['pixel_values'] = encoding['pixel_values']
  batch['labels'] = labels
  return batch

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=4, pin_memory=True, num_workers=4, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, collate_fn=collate_fn, batch_size=4, num_workers=4)

In [29]:
class YoloNet(pl.LightningModule):
     def __init__(self, lr, weight_decay):
         super().__init__()
         self.model = AutoModelForObjectDetection.from_pretrained('hustvl/yolos-tiny', 
                                                             num_labels=1,
                                                             ignore_mismatched_sizes=True)
         self.lr = lr
         self.weight_decay = weight_decay
         self.save_hyperparameters()

     def forward(self, pixel_values):
       outputs = self.model(pixel_values=pixel_values)
       return outputs
     
     def common_step(self, batch, batch_idx):
       pixel_values = batch['pixel_values']
       labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

       outputs = self.model(pixel_values=pixel_values, labels=labels)

       loss = outputs.loss
       loss_dict = outputs.loss_dict

       return loss, loss_dict

     def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)     
        self.log('train_loss', loss)
        for k,v in loss_dict.items():
          self.log('train_' + k, v.item())

        return loss

     def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)     
        self.log('validation_loss', loss) 
        for k,v in loss_dict.items():
          self.log('validation_' + k, v.item()) 
        return loss

     def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        return optimizer

     def train_dataloader(self):
        return train_dataloader

     def val_dataloader(self):
        return valid_dataloader

In [30]:
model = YoloNet(lr=2.5e-5, weight_decay=1e-4)

Downloading:   0%|          | 0.00/4.13k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0M [00:00<?, ?B/s]

Some weights of YolosForObjectDetection were not initialized from the model checkpoint at hustvl/yolos-tiny and are newly initialized because the shapes did not match:
- class_labels_classifier.layers.2.weight: found shape torch.Size([92, 192]) in the checkpoint and torch.Size([2, 192]) in the model instantiated
- class_labels_classifier.layers.2.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


**Initialize wandb logger, create model checkpoint to save weights and train the model.**

In [32]:
wandb_logger = WandbLogger(project='tanker-volume', log_model='all')
checkpoint_callback = ModelCheckpoint(monitor='validation_loss', mode='min')
trainer = Trainer(accelerator='gpu', devices=1, max_epochs=1, gradient_clip_val=0.1, accumulate_grad_batches=8, log_every_n_steps=5, logger=wandb_logger, callbacks=[checkpoint_callback]) #  checkpoint_callback to log model to W&B at end of training and changed log_every_n_steps=5 to generate better charts
trainer.fit(model)

Hint: Upgrade with `pip install --upgrade wandb`.
  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                    | Params
--------------------------------------------------
0 | model | YolosForObjectDetection | 6.5 M 
--------------------------------------------------
6.5 M     Trainable params
0         Non-trainable params
6.5 M     Total params
25.885    Total estimated model params size (MB)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.


Below are helper functions to load model weights from wnadb artifacts and to save the model to drive.

In [None]:
#wandb.finish()
checkpoint_reference = 'julij-jegorov/tanker-volume/model-2iteycrt:v10'
run = wandb.init(project='tanker-volume')
artifact = run.use_artifact(checkpoint_reference, type='model')
artifact_dir = artifact.download()
model = YoloNet.load_from_checkpoint(Path(artifact_dir) / 'model.ckpt')

In [33]:
#torch.save(model.state_dict(), f'{MODELS_PATH}/yolonet.pt')
model.load_state_dict(torch.load(f'{MODELS_PATH}/yolonet.pt'))

<All keys matched successfully>

## Display Results

*annotate_image* and *annotate_image_predicted* 
functions are imported from *image_helper.py* file. 

We know that bounding boxes must be squared. To reduce noise all rectangles with the side differences larger than Maximum Sides Difference in % (*slider_sides_diff*) can be removed by setting *remove_rectangles* boolean to True.

In [36]:
fig_real, axes_real = plt.subplots(1, 5, figsize=(30.5, 5.5), sharex=False)
fig_pred, axes_pred = plt.subplots(1, 5, figsize=(30.5, 5.5), sharex=False)

fig_real.suptitle('Annotated Bounding Boxes', fontsize=16)
fig_pred.suptitle('Predicted Bounding Boxes', fontsize=16)

remove_rectangles = True
slider_sides_diff = 0.1

random_idxs = np.random.choice(len(valid_dataset), 5)
categories = {k: v['name'] for k, v in valid_dataset.coco.cats.items()}
for idx, random_idx in enumerate(random_idxs):
  image_idx = valid_dataset.coco.getImgIds()[random_idx]
  image_name = valid_dataset.coco.loadImgs(int(image_idx))[0]['file_name']
  image_path = f'{DATA_PATH}/image_patches/{image_name}'

  annotations = valid_dataset.coco.imgToAnns[image_idx]
  image = annotate_image(image_path, annotations, categories)
  axes_real[idx].imshow(image)

  pixel_values, target = valid_dataset[random_idx]
  pixel_values = pixel_values.unsqueeze(0)
  image = annotate_image_predicted(model, pixel_values, image_path, 0.0, remove_rectangles, slider_sides_diff)
  axes_pred[idx].imshow(image)

Output hidden; open in https://colab.research.google.com to view.