The NVIDIA System Management Interface (nvidia-smi) is a command line utility, based on top of the NVIDIA Management Library (NVML), intended to aid in the management and monitoring of NVIDIA GPU devices. 

In [None]:
!nvidia-smi

Thu Aug  6 15:01:07 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.57       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# install dependencies: 

In [None]:
# install dependencies: 
# !pip install cython pyyaml==5.1
# !pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
# import torch, torchvision
# print(torch.__version__, torch.cuda.is_available())
# !gcc --version


!pip install pyyaml==5.1 pycocotools>=2.0.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab
assert torch.__version__.startswith("1.6")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html

# Import section 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import random
import cv2


import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()



from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer

from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog

import os
import json
from detectron2.structures import BoxMode


from google.colab import drive
drive.mount('/content/gdrive')


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


#  Path Define + Load csv file 

In [None]:
train_df = pd.read_csv('/content/gdrive/My Drive/Global Wheat Detection/train.csv')
train_path = "/content/gdrive/My Drive/Global Wheat Detection/train"

## Show data frame : 

In [None]:
train_df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [None]:
# train_df['width'].unique()
# train_df['height'].unique()

## Show boxes example for one random image 



In [None]:
for g in train_df.groupby('image_id'):
    b = g[1]['bbox'].values
    print(type(b),b)
    break

<class 'numpy.ndarray'> ['[0, 654, 37, 111]' '[0, 817, 135, 98]' '[0, 192, 22, 81]'
 '[4, 342, 63, 38]' '[82, 334, 82, 81]' '[30, 296, 48, 49]'
 '[176, 316, 70, 54]' '[176, 126, 69, 51]' '[203, 38, 42, 85]'
 '[3, 142, 89, 58]' '[236, 0, 60, 25]' '[329, 0, 75, 57]'
 '[796, 0, 69, 96]' '[659, 24, 59, 90]' '[540, 81, 140, 80]'
 '[233, 152, 89, 51]' '[422, 159, 58, 50]' '[462, 153, 205, 64]'
 '[468, 210, 108, 53]' '[417, 235, 136, 88]' '[287, 257, 56, 51]'
 '[283, 322, 117, 76]' '[393, 329, 174, 100]' '[606, 346, 47, 57]'
 '[611, 286, 70, 56]' '[718, 305, 54, 69]' '[709, 179, 102, 80]'
 '[813, 191, 120, 65]' '[862, 121, 65, 52]' '[876, 400, 80, 104]'
 '[951, 422, 52, 55]' '[763, 414, 69, 54]' '[633, 462, 77, 45]'
 '[438, 436, 104, 51]' '[356, 448, 65, 50]' '[292, 418, 69, 79]'
 '[251, 528, 75, 62]' '[421, 501, 52, 49]' '[692, 487, 77, 66]'
 '[769, 474, 90, 101]' '[692, 685, 83, 58]' '[611, 710, 72, 92]'
 '[417, 635, 70, 71]' '[706, 768, 103, 57]' '[820, 755, 127, 56]'
 '[899, 730, 83, 52]'

#Data Preprocessing
Grouping all the boxes according to the name of the image

In [None]:
total_data = []
for g in tqdm_notebook(train_df.groupby('image_id')):
    data = {}
    data['filename'] = g[0]
    data['bbox'] = g[1]['bbox'].values
    total_data.append(data)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=3373.0), HTML(value='')))




In total there are 3373 images

In [None]:
len(total_data)

3373

# Arrange all the images in dictionary - Create Dataset


---


For standard tasks, we load the original dataset into list[dict] with a specification similar to COCO’s json annotations. This is our standard representation for a dataset.

`file_name`: the full path to the image file. Rotation or flipping may be applied if the image has EXIF metadata.

`height, width`: integer. The shape of the image.

`image_id `(str or int): a unique id that identifies this image. Required by evaluation to identify the images, but a dataset may use it for different purposes.

`bbox (list[float])`: list of 4 numbers representing the bounding box of the instance.

`bbox_mode` (int): the format of bbox. It must be a member of structures.BoxMode. Currently supports: BoxMode.XYXY_ABS, BoxMode.XYWH_ABS.

`category_id` (int): an integer in the range [0, num_categories-1] representing the category label. The value num_categories is reserved to represent the “background” category, if applicable.

`annotations (list[dict])`: each dict corresponds to annotations of one instance in this image. Required by instance detection/segmentation or keypoint detection tasks, but can be an empty list. 

[standard-dataset-dicts](https://detectron2.readthedocs.io/tutorials/datasets.html#standard-dataset-dicts)

In [None]:

def get_wheat_dicts(total_data):
    
    dataset_dicts = []
    for idx, v in enumerate(total_data):
        record = {}
        
        filename = os.path.join(train_path, v["filename"]+'.jpg')
        height, width = 1024,1024
        
        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
      
        
        objs = []
        for b in v['bbox']:
            b = json.loads(b)
            obj = {
                'bbox': list(b),
                'bbox_mode': BoxMode.XYWH_ABS,
                'category_id':0,
            }
            objs.append(obj)
            
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts




#  Register a Dataset and “Metadata” for Datasets for Train and Val datasets

***Register a Dataset - DatasetCatalog***

The function can do arbitrary things and should returns the data in either of the following format:

Detectron2’s standard dataset dict, This will make it work with many other builtin features in detectron2, so it’s recommended to use it when it’s sufficient.


---


***Metadata - MetadataCatalog***

Metadata is a key-value mapping that contains information that’s shared among the entire dataset, and usually is used to interpret what’s in the dataset, e.g., names of classes, colors of classes, root of files, etc.

If you register a new dataset through DatasetCatalog.register, you may also want to add its corresponding metadata through MetadataCatalog.get(dataset_name).some_key = some_value, to enable any features that need the metadata.

In [None]:

#split data 90% train 10% to val 
index = int(0.9 * len(total_data))
train_data = total_data[:index]
val_data = total_data[index:]

folders = ['train', 'val']
for i, d in enumerate([train_data,val_data]):
    DatasetCatalog.register("wheat_" + folders[i], lambda d=d: get_wheat_dicts(d))
    MetadataCatalog.get("wheat_" + folders[i]).set(thing_classes=["wheat"])


# Displays 3 random images from the train by using Visualizer and MetaData

## Visualizer

Visualizer that draws data about detection/segmentation on images.

It contains methods like draw_{text,box,circle,line,binary_mask,polygon} that draw primitive objects to images


`visualizer.draw_dataset_dict`  - Draw annotations/segmentaions in Detectron2 Dataset format.

Args:

    dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.  
Returns:

    output (VisImage): image object with visualizations. 

In [None]:
wheat_metadata = MetadataCatalog.get("wheat_train")

In [None]:
# train_data[0]['bbox'][0]

In [None]:
dataset_dicts = get_wheat_dicts(train_data)
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=wheat_metadata, scale=1)
    vis = visualizer.draw_dataset_dict(d)
    plt.figure(figsize=[10, 20])
    plt.imshow(vis.get_image()[:,:,::-1])
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

# RetinaNet 



---

RetinaNet adopts the Feature Pyramid Network (FPN) proposed by Lin, Dollar, et al. (2017) as its backbone, which is in turn built on top of ResNet in a fully convolutional fashion. The fully convolutional nature enables the network to take an image of an arbitrary size and outputs proportionally sized feature maps at multiple levels in the feature pyramid.


![alt text](https://blog.zenggyu.com/post/2018-12-05/fig_1.jpg)

[RetinaNet Explained and Demystified](https://blog.zenggyu.com/en/post/2018-12-05/retinanet-explained-and-demystified/)




---

## RetinaNet Setup

cfg.merge_from_file - load values from a file yaml
cfg.MODEL.WEIGHTS  - load WEIGHTS from a file yaml

```
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml")  
```



This is the number of foreground classes, we have only wheat class.

```
  cfg.MODEL.RETINANET.NUM_CLASSES = 1
```


Inference cls score threshold, only anchors with score > INFERENCE_TH are considered for inference (to improve speed)


```
  cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
  cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
  cfg.MODEL.RETINANET.NMS_THRESH_TEST = 0.5
```

 Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets

 ```
  cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
```

Loss parameters

![alt text](https://miro.medium.com/max/784/1*FEu_aqp-n1gQ0M-t-OkilQ.png)
  ```
  cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
  cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
  cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.3
```

In [None]:
def retinanet_setup():
  cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml")  
  cfg.MODEL.RETINANET.NUM_CLASSES = 1

  cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
  cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
  cfg.MODEL.RETINANET.NMS_THRESH_TEST = 0.5


  cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)

  cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0

  cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25

  cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.3

In [None]:
def faster_rcnn_setup ():
  cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")

  cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
  # cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

General config for all models in detectron2

In [None]:


cfg = get_cfg()

retinanet_setup()

# cfg.MODEL.WEIGHTS =  '/content/gdrive/My Drive/Global Wheat Detection/yonatan_checkpoints/outputs/model_final.pth'
cfg.DATASETS.TRAIN = ("wheat_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 4

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  

cfg.SOLVER.GAMMA = 0.05
cfg.SOLVER.MAX_ITER = 30000
cfg.SOLVER.MOMENTUM = 0.9
# Save a checkpoint after every this number of iterations
cfg.SOLVER.CHECKPOINT_PERIOD = 10000

cfg.TEST.EVAL_PERIOD =1000



Loading config /usr/local/lib/python3.6/dist-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


# Model training

In this section we use DefaultTrainer to create a trainer , The trainer will train the model according to config set for him in the section above.


In [None]:
cfg.OUTPUT_DIR = '/content/gdrive/My Drive/Global Wheat Detection/yonatan_checkpoints/outputs'
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

# Load Model 

load model with DATASETS.TEST = "wheat_val" 

In [None]:

# cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.WEIGHTS =  '/content/gdrive/My Drive/Global Wheat Detection/yonatan_checkpoints/outputs/model_final.pth'
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = ("wheat_val", )
predictor = DefaultPredictor(cfg)

# View predictions on multiple images

In [None]:
from detectron2.utils.visualizer import ColorMode
dataset_dicts = get_wheat_dicts(val_data)
# wheat_metadata = MetadataCatalog.get("wheat_val")
for d in random.sample(dataset_dicts, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    # print(outputs)
    v = Visualizer(im[:, :, ::-1],     
                   metadata=wheat_metadata, 
                   scale=0.8, 
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=[10, 20])
    plt.imshow(v.get_image()[:,:,::-1])
    plt.show()

Output hidden; open in https://colab.research.google.com to view.


# Understanding Evaluation Metrics


---

This competition is evaluated on the **mean average precision** at different intersection over union (IoU) thresholds.
 
To understand mAP, we will explain about precision and recall first.
* Recall is the True Positive Rate i.e. Of all the actual positives, how many are True positives predictions. 
* Precision is the Positive prediction value i.e. Of all the positive predictions, how many are True positives predictions. Read more in evaluation metrics for classification.



---
## mAP 

**mAP (mean average precision)** is the average of AP. In some context, we compute the AP for each class and average them. But in some context, they mean the same thing. For example, under the COCO context, there is no difference between AP and mAP.

# Evaluate AR + AP 

Evaluate AR for object proposals, AP for instance detection/segmentation, AP for keypoint detection outputs using COCO's metrics


AP (Average precision) is a popular metric in measuring the accuracy of object detectors like Faster R-CNN, SSD, etc. Average precision computes the average precision value for recall value over 0 to 1. 

![alt text](https://miro.medium.com/max/1925/1*_IkyrFHlqt_xCovk7l0rQQ.png)



In [None]:
#//AP=46.032 BASE_LR = 0.01 TS=0.5 SOLVER.MOMENTUM=0.9 IMS_PER_BATCH = 8 FOCAL_LOSS_GAMMA = 0.9 FOCAL_LOSS_ALPHA = 0.6 SMOOTH_L1_LOSS_BETA = 0.2//


from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
evaluator = COCOEvaluator("wheat_val", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "wheat_val")
inference_on_dataset(predictor.model, val_loader, evaluator)

[32m[08/06 16:11:01 d2.evaluation.coco_evaluation]: [0m'wheat_val' is not registered by `register_coco_instances`. Therefore trying to convert it to COCO format ...
[32m[08/06 16:11:01 d2.data.datasets.coco]: [0mConverting annotations of dataset 'wheat_val' to COCO format ...)
[32m[08/06 16:11:01 d2.data.datasets.coco]: [0mConverting dataset dicts into COCO format
[32m[08/06 16:11:02 d2.data.datasets.coco]: [0mConversion finished, #images: 338, #annotations: 15116
[32m[08/06 16:11:02 d2.data.datasets.coco]: [0mCaching COCO format annotations at './output/wheat_val_coco_format.json' ...
[32m[08/06 16:11:03 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|   wheat    | 15116        |
|            |              |[0m
[32m[08/06 16:11:03 d2.data.common]: [0mSerializing 338 elements to byte tensors and concatenating them all ...
[32m[08/06 16:11:03 d2.data.common]: [0mSerialized dataset ta

OrderedDict([('bbox',
              {'AP': 47.35426124462239,
               'AP50': 83.98597983149305,
               'AP75': 48.13063738128962,
               'APl': 51.84472146634682,
               'APm': 46.614600928733616,
               'APs': 5.6988418622082})])

# Submission Section

In [None]:
from pathlib import Path
# data_dir = Path('/content/gdrive/My Drive/Global Wheat Detection')
# train_img_dir = Path(data_dir / 'train')
test_img_dir = Path('/content/gdrive/My Drive/Global Wheat Detection/test')

sub_path = Path(data_dir / 'sample_submission.csv')
sub_df = pd.read_csv(sub_path)

In [None]:
from tqdm import tqdm
def submit():
    for idx, row in tqdm(sub_df.iterrows(), total=len(sub_df)):
        img_path = os.path.join(test_img_dir, row.image_id + '.jpg')
        
        img = cv2.imread(img_path)
        outputs = predictor(img)['instances']
        boxes = [i.cpu().detach().numpy() for i in outputs.pred_boxes]
        scores = outputs.scores.cpu().detach().numpy()
        list_str = []
        for box, score in zip(boxes, scores):
            box[3] -= box[1]
            box[2] -= box[0]
            box = list(map(int,box))
            score = round(score, 4)
            list_str.append(score)
            list_str.extend(box)
        sub_df.loc[idx, 'PredictionString'] = ' '.join(map(str, list_str))
        
    return sub_df

In [None]:
sub_df = submit()
sub_df.to_csv('submission.csv', index=False)

In [None]:
sub_df