# Sainfoin Seed Classification FasterRCNN Model

### For Colab Users
Run the code cell below to mount your Google Drive. Change the `BASE_DIR` variable to the folder location of the cloned GitHub folder relative to your Google Drive, so something along the lines of 'drive/MyDrive/path/to/dir/repo_folder'.

In [1]:
from google.colab import drive
drive.mount('/content/drive')
BASE_DIR = 'drive/MyDrive/github_repos/sainfoin_seed_RCNN'
%cd -q $BASE_DIR


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import datetime
str(datetime.date.today())

'2023-09-07'

In [3]:
!pip install -r colab_requirements.txt



### Module Import

In [4]:
import os
import sys
import cv2
import torch
import torchvision
import torchmetrics
import yaml
import time
import logging

import pandas as pd
import numpy as np
import albumentations as A

from glob import glob
from tqdm import tqdm
from random import randint
from albumentations.pytorch import ToTensorV2
from torchmetrics.classification import MulticlassAccuracy, MulticlassJaccardIndex
from torchmetrics.detection import IntersectionOverUnion, MeanAveragePrecision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import ToTensor
from torchvision.ops import nms
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

### Import Custom Code and Configuration Settings

In [5]:
sys.path.append('./src')
from config import parse_config
from loggers import create_logger
from model import create_model
from transforms import train_transforms, val_transforms, collate_fn
from datasets import SeedDataset, dir_sampler
from data_splitting import train, val, test
from train_val import train_model
from config import train_dir, val_dir, test_dir, annotation_dir, chkpt_dir, tensorboard_dir, log_dir
from config import device, cores, classes, n_classes, resize_to, n_epochs, batch_size
from config import base_name, lr, momentum, gamma



# train_dir, val_dir, test_dir, annot_dir,\
# chkpt_dir, tb_dir, log_dir, device, cores,\
# classes, n_classes, resize_to, n_epochs, batch_size,\
# base_name, lr, momentum, gamma = parse_config('config.yml')

### Set up Loggers
This cell is meant to be set up only once! If you run it multiple times, you will end up with many duplicates of each logging message.

In [6]:
logger = create_logger()
logger.info('Training notebook started')

2023-09-07 21:31:57,220:main_app:INFO: - Training notebook started


### Log System Information

In [7]:
os_details = !lsb_release -a
os_details = '\n'.join(os_details)

cpu_info = !lscpu
cpu_info = '\n'.join(cpu_info)

sys_info = 'Python ' + sys.version

headings = ['OS_DETAILS:\n', 'CPU_INFO:\n', 'PYTHON KERNEL:\n']
details = [os_details, cpu_info, sys_info]

for h, d in zip(headings, details):
  logger.info(h + d)

2023-09-07 21:31:57,455:main_app:INFO: - OS_DETAILS:
No LSB modules are available.
Distributor ID:	Ubuntu
Description:	Ubuntu 22.04.2 LTS
Release:	22.04
Codename:	jammy
2023-09-07 21:31:57,457:main_app:INFO: - CPU_INFO:
Architecture:            x86_64
  CPU op-mode(s):        32-bit, 64-bit
  Address sizes:         46 bits physical, 48 bits virtual
  Byte Order:            Little Endian
CPU(s):                  8
  On-line CPU(s) list:   0-7
Vendor ID:               GenuineIntel
  Model name:            Intel(R) Xeon(R) CPU @ 2.00GHz
    CPU family:          6
    Model:               85
    Thread(s) per core:  2
    Core(s) per socket:  4
    Socket(s):           1
    Stepping:            3
    BogoMIPS:            4000.29
    Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc
                         a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal
                         l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo
                     

In [8]:
# Validate torch device settings
if device=='cuda':
  try:
    assert torch.cuda.is_available(), 'No CUDA device is available.'
    logger.info(f"CUDA device name: {torch.cuda.get_device_name()}")
    logger.info(f"CUDA device capabilities: {torch.cuda.get_device_capability()}")
    logger.info(f"CUDA device properties: {torch.cuda.get_device_properties(0)}")
  except AssertionError as e:
    logger.warning(e, exc_info=True)
    logger.info("Overwriting model config and setting device to 'cpu'.")
    device = torch.device('cpu')


2023-09-07 21:31:57,481:main_app:INFO: - CUDA device name: Tesla V100-SXM2-16GB
2023-09-07 21:31:57,482:main_app:INFO: - CUDA device capabilities: (7, 0)
2023-09-07 21:31:57,483:main_app:INFO: - CUDA device properties: _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16150MB, multi_processor_count=80)


### Model Training and Evaluation
We will build individual models for each of a range of training sample sizes. The total image dataset is 500 images, thus an 80/20 split on the data results in 400 training images and 100 validation images. The training sizes will be split up as follows:
```
[20, 40, 80, 200, 400]
```
with corresponding validation sizes of
```
[5, 10, 20, 50, 100]
```
So 5%, 10%, 20%, 50%, and 100% of the total available data for each group

In [9]:
# Set proportion sizes for subsampling the data
# sizes = [0.05, 0.1, 0.2, 0.5, 1.0]
# sizes = [0.05, 0.5, 1.0]
# sizes = [0.5, 1.0]
sizes = [1.0]
# sizes = [0.5]
# Read in all of the image metadata
img_data = pd.read_csv('./data/power_analysis/results.csv')
print(img_data.head())


   Unnamed: 0 img_id   img_name  class                variety     method  \
0           0  12600  12600.jpg  train  Rocky Mountain Remont  HLDP7x35s   
1           1  9adc6  9adc6.JPG  train               Shoshone       BT3X   
2           2  1cd49  1cd49.jpg  train       AAC Mountainview  HLDP7x35s   
3           3  6eb3d  6eb3d.JPG  train               Shoshone       BT3X   
4           4  7b24d  7b24d.JPG  train               Shoshone       BT3X   

   sample_mass_g  rep  legume_fruit_mass_g  legume_seed_mass_g comments  
0              2    7               2.0189               1.569      NaN  
1              2    5               2.0131               1.677      NaN  
2              4    1               4.0014               2.921      NaN  
3              1   10               1.0075               0.887      NaN  
4              3    8               3.0066               2.736      NaN  


In [10]:
img_data.loc[img_data['class']=='train'].shape

(400, 11)

In [None]:
# Start Tensorboard writer
model_config = [
    "model_name:\tfasterrcnn",
    f"pretrained:\tTrue",
    f"classes:\t{classes}",
    f"n_classes:\t{n_classes}",
    f"lr:\t{lr}",
    f"momentum:\t{momentum}",
    f"n_epochs:\t{n_epochs}",
    f"batch_size:\t{batch_size}",
    f"lr_scheduler_gamma:\t{gamma}"
]
logger.info("MODEL_CONFIG\n"+"\n".join(model_config))

train_imgs = img_data.loc[img_data['class']=='train']
val_imgs = img_data.loc[img_data['class']=='val']

logger.info("Starting training scenarios.")
for size in sizes:
  today = str(datetime.date.today())
  writer = SummaryWriter(os.path.join(tensorboard_dir, f"{base_name}_{size}_{lr}_{today}"))
  logger.info("Starting Tensorboard Summary Writer.")
  logger.info(f"Scenario {sizes.index(size)+1}: training on imageset of {img_data.shape[0]*size}")
  logger.info(f"Creating train/validation splits for {size*100}% of the imageset in an 80/20 train/val split")
  if size < 1:
    train, _ = train_test_split(train_imgs,
                                test_size=size,
                                train_size=size,
                                stratify=train_imgs[['method']],
                                random_state=345)
    _, val = train_test_split(val_imgs,
                              test_size=size,
                              train_size=size,
                              stratify=val_imgs[['method']],
                              random_state=345)
  else:
    train = train_imgs.copy()
    val = val_imgs.copy()

  logger.info(f"Sampled images.\nTraining set: {train.shape[0]} images.\nVal set: {val.shape[0]} images")

  try:
    model_name = base_name + f"_{size}"
    model = create_model(n_classes, n_obj_det=500)
    # optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)
    if not os.path.exists(f"./model_chkpt/{model_name}"):
      os.mkdir(f"./model_chkpt/{model_name}")
  except:
    logger.exception('An exception occurred.')

  # Create train dataset
  train_dataset = SeedDataset(image_dir=train_dir,
                              annot_dir=annotation_dir,
                              resize_dims=(resize_to, resize_to),
                              classes=classes,
                              transforms=train_transforms(),
                              subset=list(train['img_name'].unique()))

  # Create val dataset
  val_dataset = SeedDataset(image_dir=val_dir,
                            annot_dir=annotation_dir,
                            resize_dims=(resize_to, resize_to),
                            classes=classes,
                            transforms=val_transforms(),
                            subset=list(val['img_name'].unique()))

  train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=cores,
    collate_fn=collate_fn,
  )

  val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=cores,
    collate_fn=collate_fn
  )

  train_model(model=model,
              optimizer=optimizer,
              scheduler=scheduler,
              n_epochs=n_epochs,
              device=device,
              train_loader=train_loader,
              val_loader=val_loader,
              logger=logger,
              writer=writer,
              model_name=model_name)

  writer.close()

2023-09-07 21:31:57,517:main_app:INFO: - MODEL_CONFIG
model_name:	fasterrcnn
pretrained:	True
classes:	{'0': 'background', '1': 'split', '2': 'seed', '3': 'pod'}
n_classes:	4
lr:	0.01
momentum:	0.9
n_epochs:	50
batch_size:	2
lr_scheduler_gamma:	0.9
2023-09-07 21:31:57,520:main_app:INFO: - Starting training scenarios.
2023-09-07 21:31:57,527:main_app:INFO: - Starting Tensorboard Summary Writer.
2023-09-07 21:31:57,528:main_app:INFO: - Scenario 1: training on imageset of 500.0
2023-09-07 21:31:57,530:main_app:INFO: - Creating train/validation splits for 100.0% of the imageset in an 80/20 train/val split
2023-09-07 21:31:57,531:main_app:INFO: - Sampled images.
Training set: 400 images.
Val set: 100 images
2023-09-07 21:32:01,329:main_app:INFO: - Initializing training sequence
2023-09-07 21:32:01,331:main_app:INFO: - Epoch: 1
LR: [0.01]
Train Loss:  0.9476: 100%|██████████| 200/200 [04:06<00:00,  1.23s/it]
Val Loss: 0.9275: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s]
2023-09-07 21:36:5

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-5ad851e0df6a>", line 85, in <cell line: 19>
    train_model(model=model,
  File "/content/drive/MyDrive/github_repos/sainfoin_seed_RCNN/./src/train_val.py", line 73, in train_model
  File "/content/drive/MyDrive/github_repos/sainfoin_seed_RCNN/./src/train_val.py", line 17, in _train
  File "/usr/local/lib/python3.10/dist-packages/tqdm/std.py", line 1182, in __iter__
    for obj in iterable:
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 633, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1345, in _next_data
    return self._process_data(data)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1371, in _process_data
  

In [11]:
loss_dict = {'loss_classifier': torch.Tensor([0.1050]), 'loss_box_reg': torch.Tensor([0.1940]), 'loss_objectness': torch.Tensor([6.9332e-05]), 'loss_rpn_box_reg': torch.Tensor([0.0159])}
loss = sum(i for i in loss_dict.values())
print(loss)
loss = torch.mean(torch.Tensor([i for i in loss_dict.values()]))
print(loss)

tensor([0.3150])
tensor(0.0787)


In [12]:
torch.Tensor([34.5])

tensor([34.5000])