In [1]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import json
import cv2
from PIL import Image
import random

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

# import albumentations as A
# from albumentations.pytorch.transforms import ToTensorV2
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.8.1+cpu
Torchvision Version:  0.7.0


In [2]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
insect_classes=["Araneae","Coleoptera","Diptera","Hemiptera","Hymenoptera","Lepidoptera","Odonata"]

def make_samples(root_path, classes):
    train_annotations=[]
    train_each_length=[]
    valid_annotations=[]
    valid_each_length=[]
    
    for c in classes:
        train_last_length=len(train_annotations)
        valid_last_length=len(valid_annotations)
        path=root_path+"/"+c+"/annotations"
        ld=os.listdir(path)
        i=0
        for js_file in ld:
            if os.path.splitext(js_file)[1] == '.json':
                with open(path+"/"+js_file) as f:
                    js_dic=json.load(f)
                    if i>0.7*len(ld):
                        valid_annotations.append(js_dic)
                    else:
                        train_annotations.append(js_dic)
                i=i+1
        train_current_length=len(train_annotations)
        train_each_length.append(train_current_length-train_last_length)
        valid_current_length=len(valid_annotations)
        valid_each_length.append(valid_current_length-valid_last_length)
    
    return train_annotations, train_each_length,valid_annotations,valid_each_length

root="ArTaxOr"
train_anno, train_each_l, valid_anno, valid_each_l=make_samples(root,insect_classes)

print(train_each_l,valid_each_l)

class InsectsData(Dataset):
    def __init__(self,root_path,dic_names,samples,data_length,transforms):
        self.root=root_path
        self.classes=dic_names
        self.samples=samples
        self.data_length=data_length
        self.transforms = transforms

    def __getitem__(self, index):
        if index<self.data_length[0]:
            dic_name=self.classes[0]
        elif index<sum(self.data_length[0:2]):
            dic_name=self.classes[1]
        elif index<sum(self.data_length[0:3]):
            dic_name=self.classes[2]
        elif index<sum(self.data_length[0:4]):
            dic_name=self.classes[3]
        elif index<sum(self.data_length[0:5]):
            dic_name=self.classes[4]
        elif index<sum(self.data_length[0:6]):
            dic_name=self.classes[5]
        elif index<sum(self.data_length[0:7]):
            dic_name=self.classes[6]
        
        target={}
        annotation=self.samples[index]
        asset=annotation["asset"]
        regions=annotation["regions"]
        image_name=asset["name"]
        bboxes = np.zeros((len(regions), 4))
        areas=np.zeros(len(regions),)
        labels=np.zeros(len(regions),)
        i=0
        for region in regions:
            bboxes[i,0]=region["points"][0]["x"]
            bboxes[i,1]=region["points"][0]["y"]
            bboxes[i,2]=region["points"][2]["x"]
            bboxes[i,3]=region["points"][2]["y"]
            labels[i]=self.classes.index(region["tags"][0])
            box = region["boundingBox"]
            height, width= box["height"], box["width"]
            areas[i]=height*width
            i=i+1
        
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        areas = torch.as_tensor(areas,dtype=torch.float32)
        iscrowd = torch.zeros((len(regions),), dtype=torch.int64)
        target["boxes"] = bboxes
        target["image_id"]=torch.tensor([index])
        target["labels"]=labels
        target["area"]=areas
        target["iscrowd"] = iscrowd
        image=cv2.imread(self.root+"/"+dic_name+"/"+image_name,cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
#         gray=cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        if self.transforms:
            sample = {
            'image': image,
            'bboxes': target['boxes'],
            'labels': labels,
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        return image, target
    
    
    def __len__(self):
        return len(self.samples)
    
    @staticmethod
    def get_train_transform():
        return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

    @staticmethod
    def get_valid_transform():
        return A.Compose([
        ToTensorV2(p=1.0)
        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


[1695, 1478, 1422, 1672, 1435, 1475, 1595] [723, 632, 608, 715, 613, 631, 682]


In [3]:
# i=0
# for dsa in a:
#     regions=dsa["regions"]
#     for region in regions:
#         if len(region["tags"])==1:
#             i=i+1

# print(i)

In [4]:
train_dataset=InsectsData(root,insect_classes,train_anno,train_each_l,InsectsData.get_train_transform())
valid_dataset=InsectsData(root,insect_classes,valid_anno,valid_each_l,InsectsData.get_valid_transform())
print(train_dataset.__len__())
img, gt = train_dataset.__getitem__(4769) # get the 34th sample
print(type(img))
print(gt)

10772
<class 'torch.Tensor'>
{'boxes': tensor([[ 474.0048,  429.3199, 1267.4021, 1087.9502]], dtype=torch.float64), 'image_id': tensor([4769]), 'labels': tensor([3]), 'area': tensor([522555.5000]), 'iscrowd': tensor([0])}


In [None]:
for pic,targ in train_dataset:
    for label in targ["labels"]:
        print(label)

tensor(2)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(4)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)


In [6]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

In [7]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
print(model.roi_heads.box_predictor)

FastRCNNPredictor(
  (cls_score): Linear(in_features=1024, out_features=91, bias=True)
  (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)
)


In [8]:
num_classes = 7 

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained model's head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
print(model.roi_heads.box_predictor)

FastRCNNPredictor(
  (cls_score): Linear(in_features=1024, out_features=7, bias=True)
  (bbox_pred): Linear(in_features=1024, out_features=28, bias=True)
)


In [None]:
from torch.utils.data import DataLoader

def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = DataLoader(
  train_dataset,
  batch_size=4,
  shuffle=False,
  num_workers=4,
  collate_fn=collate_fn
)

valid_data_loader = DataLoader(
  valid_dataset,
  batch_size=4,
  shuffle=False,
  num_workers=4,
  collate_fn=collate_fn
)

In [None]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# move model to the right device
model.to(device)

# create an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.00001, momentum=0.9, weight_decay=0.0005)

# create a learning rate scheduler
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None

# train it for 10 epochs
num_epochs = 10

In [None]:
import time
from tqdm import tqdm
#from tqdm.notebook import tqdm as tqdm

itr = 1

total_train_loss = []
total_valid_loss = []

losses_value = 0

for epoch in range(num_epochs):

    start_time = time.time()

    # train ------------------------------

    model.train()
    train_loss = []

    pbar = tqdm(train_data_loader, desc='let\'s train')
    for images, targets in pbar:

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        losses_value = losses.item()
        train_loss.append(losses_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        pbar.set_description(f"Epoch: {epoch+1}, Batch: {itr}, Loss: {losses_value}")
        itr += 1

    epoch_train_loss = np.mean(train_loss)
    total_train_loss.append(epoch_train_loss)

    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()

    # valid ------------------------------

    with torch.no_grad():
        valid_loss = []

        for images, targets in valid_data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()
            valid_loss.append(loss_value)

    epoch_valid_loss = np.mean(valid_loss)
    total_valid_loss.append(epoch_valid_loss)

    # print ------------------------------

    print(f"Epoch Completed: {epoch+1}/{num_epochs}, Time: {time.time()-start_time}, "
        f"Train Loss: {epoch_train_loss}, Valid Loss: {epoch_valid_loss}")

In [None]:
plt.figure(figsize=(8, 5))
sns.set_style(style="whitegrid")
sns.lineplot(x=range(1, len(total_train_loss)+1), y=total_train_loss, label="Train Loss")
sns.lineplot(x=range(1, len(total_train_loss)+1), y=total_valid_loss, label="Valid Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

In [None]:
torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn.pth')