In [1]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
from torch.optim import lr_scheduler 
import torch.nn.functional as F
import numpy as np 

import torchvision 

from torchvision import datasets, models, transforms
from torch.cuda.amp import autocast, GradScaler 

import time 
import os 

In [2]:
import sys

In [3]:
from copy import deepcopy

In [4]:
import cv2
from random import uniform

In [5]:
from detection.engine import evaluate

In [6]:
weights_file_path = r'..'
project_path=r'..'# 

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [8]:
#----------------------
pin_memory_bool=True#pin_memory
use_amp = True#Automatic Mixed Precision
#####optiizer_settings
optimizer_ft = None
weight_decay_setting = 1e-08
eps_setting = 1e-08
#-----------------------------

In [9]:
###########################
BNorm_decay_setting = 0.
#######################################

In [10]:
def my_get_detection_model_NViT_lite(num_classes, pretrained=True):
    
    from collections import OrderedDict
    from detection.my_faster_rcnn import FasterRCNN
    from torchvision.models.detection.rpn import AnchorGenerator
    from detection.backbone_utils import BackboneWithFPN

    from NextViT.nextvit_lite import nextvit_small

    model_ft = nextvit_small()

    ####################
    layer_list = None
    for layer in  model_ft.stem:
        if layer_list is None:
            layer_list = nn.Sequential(layer)
        else:
            layer_list.append(layer)

    for layer in  model_ft.features:
        if layer_list is None:
            layer_list = nn.Sequential(layer)
        else:
            layer_list.append(layer)

    layer_list.append(model_ft.norm)
    ##########################3
    backbone = layer_list
    
    ###############################
    global return_layers
    return_layers = {'6': '0', '10': '1', '20': '2', '23': '3',}
    
    in_channels_list = [96, 256, 512, 256, ]
    
    global FPN_out_channels,using_dropout

    backbone = BackboneWithFPN(backbone,return_layers, in_channels_list,FPN_out_channels,dropout = using_dropout)
    #####################
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
            featmap_names=['0','1','2','3'],
            output_size=7,
            sampling_ratio=2
        )
    ###########
    global anchor_sizes
    
    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
    anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
    ##################################

    model = FasterRCNN(
            backbone=backbone,
            num_classes=num_classes,
            rpn_anchor_generator=anchor_generator,
            box_roi_pool=roi_pooler,
            min_size = sample_min_size, max_size = sample_max_size
        ) 
    
    return model.to(device)

In [11]:
def get_transforms():
    from torchvision.transforms import v2

    ###################################################
    global my_transforms_train, sample_min_size,data_dir
    '''my_transforms_train = v2.Compose(
        [
            v2.ToImage(),
            ###
            v2.Resize(size=(sample_min_size,sample_min_size),antialias=True),
            
            v2.RandomApply(torch.nn.ModuleList([v2.ColorJitter(0.5,0.5,0.5),]),p = Cjit_prob),
            ###
            v2.RandomHorizontalFlip(p = RHF_prob), 
            ####
            v2.RandomVerticalFlip(p = RVF_prob),
            ####
            #v2.RandomApply(torch.nn.ModuleList([v2.RandomRotation(degrees=180,
            #interpolation=transforms.InterpolationMode.BILINEAR)]), p=Rotation_prob),
            ##
            v2.RandomGrayscale(p = Rgray_prob),
            ###
            v2.RandomAutocontrast(p = Rcontrast_prob),
            ###
            v2.RandomApply(torch.nn.ModuleList([v2.GaussianBlur(kernel_size = 5),]),p = Gausblur_prob),
            ###
            v2.ToDtype(torch.float32, scale = False),
        ]
    )'''
    ######################################################3
    global my_transforms_val
    my_transforms_val = v2.Compose(
        [            
            v2.ToImage(), 
            v2.Resize(size=(sample_min_size,sample_min_size),antialias=True),
            v2.ToDtype(torch.float32, scale = False),
        ]
    )

In [12]:
def get_datasets():
    get_transforms()
    ######################
    global image_datasets
    image_datasets = {}
    train_anno_json = f'train-annotation-{data_dir[-1]}.json'
    #################################################################################
    '''image_datasets['train'] = datasets.CocoDetection(root= os.path.join(data_dir, 'train'), 
                                annFile = os.path.join(data_dir, 'anno_dir',train_anno_json),
                                                     transforms = my_transforms_train,                                                 
                                        )'''

    #########################################################################################################
    
    if 'DIOR20' in data_dir or 'MAR20' in data_dir or 'ShipRS50' in data_dir:
        data_dir_val = os.path.dirname(data_dir)
    else:
        data_dir_val =data_dir
        
    image_datasets['val'] = datasets.CocoDetection(root = os.path.join(data_dir_val, 'val'), 
                                annFile = os.path.join(data_dir, 'anno_dir','val-annotation.json'), \
                                                       transforms = my_transforms_val, )


In [13]:
def wrap_datasets():
    global image_datasets
    '''image_datasets['train'] = datasets.wrap_dataset_for_transforms_v2(image_datasets['train'], \
            target_keys= ["boxes", 'labels','image_id'])'''
    
    image_datasets['val'] = datasets.wrap_dataset_for_transforms_v2(image_datasets['val'], \
                target_keys= ["boxes", "labels",'area','image_id','iscrowd'])


def get_datasets_fix_resize():
    get_transforms()
    ######################
    global image_datasets_fix
    image_datasets_fix = {}

    #########################################################################################################
    
    if 'DIOR20' in data_dir or 'MAR20' in data_dir or 'ShipRS50' in data_dir:
        data_dir_val = os.path.dirname(data_dir)
    else:
        data_dir_val =data_dir
        
    image_datasets_fix['val'] = datasets.CocoDetection(root = os.path.join(data_dir_val, 'val'), 
                                annFile = os.path.join(data_dir, 'anno_dir','val-annotation.json'), \
                                                       transforms = my_transforms_val, 
                                                  )
    image_datasets_fix['val'] = datasets.wrap_dataset_for_transforms_v2(image_datasets_fix['val'], \
                target_keys= ["boxes", "labels",'area','image_id','iscrowd'])

In [14]:
def get_dataloaders():
    wrap_datasets()
    ##############
    global dataloaders,pin_memory_bool,persistent_workers
    dataloaders ={}
    '''dataloaders['train']= torch.utils.data.DataLoader(image_datasets['train'], batch_size = train_Bsize,
            shuffle=True, pin_memory=pin_memory_bool, persistent_workers = persistent_workers,\
                                                      num_workers =loader_workers, \
                                                      collate_fn=lambda batch: tuple(zip(*batch)))'''
    dataloaders['val'] =  torch.utils.data.DataLoader(image_datasets['val'], batch_size = val_Bsize,\
            shuffle=True, pin_memory=pin_memory_bool, persistent_workers = persistent_workers,\
                                                      num_workers=loader_workers,\
                                                      collate_fn=lambda batch: tuple(zip(*batch))) 

In [15]:
def group_weight(module):
    group_decay = []
    group_no_decay = []
    for m in module.modules():
        if isinstance(m, nn.Linear):
            group_decay.append(m.weight)
            if m.bias is not None:
                group_no_decay.append(m.bias)
        elif isinstance(m, torch.nn.modules.conv._ConvNd):
            group_decay.append(m.weight)
            if m.bias is not None:
                group_no_decay.append(m.bias)
        elif isinstance(m, torch.nn.modules.batchnorm._BatchNorm):
            if m.bias is not None:
                group_no_decay.append(m.weight)
            if m.bias is not None:
                group_no_decay.append(m.bias)

    assert len(list(module.parameters())) == len(group_decay) + len(group_no_decay)
    groups = [dict(params=group_decay), dict(params=group_no_decay, weight_decay = BNorm_decay_setting)]
    return groups

In [16]:
def get_optimizer():
    
    global model_ft,optimizer_ft,lr_init_RCNN
    
    params_grouped = group_weight(model_ft)

    optimizer_ft = optim.AdamW(params_grouped,\
                               lr=lr_init_RCNN,betas=(0.9,0.999),eps = eps_setting,\
                               weight_decay = weight_decay_setting)
    #############################################
    global exp_lr_scheduler
    exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, 
                                T_max=T_max_setting, eta_min=eta_min_setting)

In [17]:
def get_model_ft():
    global model_ft
    global num_classes,sample_min_size,sample_max_size,loader_workers,train_Bsize,val_Bsize,image_input_size
    if 'DIOR20' in data_dir:
        num_classes = 21 
        sample_min_size = image_input_size 
        sample_max_size = image_input_size
        #val_Bsize = 24
    elif 'NWPU-VHR10' in data_dir:
        num_classes = 11 
        sample_min_size = image_input_size 
        sample_max_size = image_input_size
        #val_Bsize = 16
    elif 'MAR20' in data_dir:
        num_classes = 21 
        sample_min_size = image_input_size 
        sample_max_size = image_input_size
    elif 'ShipRS50' in data_dir:
        num_classes = 51 
        sample_min_size = image_input_size 
        sample_max_size = image_input_size
    #
    
    if model_selection == 'Faster-RCNN-NViT-fpn-lite':
        model_ft = my_get_detection_model_NViT_lite(num_classes)


In [18]:
def init_for_training():
    global val_in_coco,logfile_path
    
    get_model_ft()
    
    get_datasets()

    get_dataloaders()
    
    
    val_in_coco = convert_to_coco_api(dataloaders['val'].dataset)
    
    get_optimizer()

    #get_recordfiles()

    #get_setting_log()

    #global logfile_path
    
    #logfile_path = os.path.join(project_path,training_logfile_name)

    print('model is ready for training & testing!!!')

In [19]:
@torch.inference_mode()
def evaluate_resize(model, data_loader, device):
    from detection.engine import _get_iou_types
    from detection.coco_eval import CocoEvaluator
    from detection.utils import MetricLogger
    
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = "Test:"

    global val_in_coco
    coco = val_in_coco
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"]: output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [20]:
def convert_to_coco_api(ds):
    from pycocotools.coco import COCO
    
    coco_ds = COCO()
    # annotation IDs need to start at 1, not 0, see torchvision issue #1530
    ann_id = 1
    dataset = {"images": [], "categories": [], "annotations": []}
    categories = set()
    for img_idx in range(len(ds)):
        # find better way to get target
        # targets = ds.get_annotations(img_idx)
        img, targets = ds[img_idx]       
                
        image_id = targets["image_id"]
        img_dict = {}
        img_dict["id"] = image_id
        img_dict["height"] = img.shape[-2]
        img_dict["width"] = img.shape[-1]
        dataset["images"].append(img_dict)
        bboxes = targets["boxes"].clone()
        bboxes[:, 2:] -= bboxes[:, :2]
        bboxes = bboxes.tolist()
        labels = targets["labels"].tolist()
        areas = targets["area"]#.tolist()
        if 'iscrowd' in targets:
            iscrowd = targets["iscrowd"]
        if "masks" in targets:
            masks = targets["masks"]
            # make masks Fortran contiguous for coco_mask
            masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
        if "keypoints" in targets:
            keypoints = targets["keypoints"]
            keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
        num_objs = len(bboxes)
        for i in range(num_objs):
            ann = {}
            ann["image_id"] = image_id
            ann["bbox"] = bboxes[i]
            ann["category_id"] = labels[i]
            categories.add(labels[i])
            #ann["area"] = areas[i]
            ###fix height & width
            box_height = bboxes[i][2]
            box_width = bboxes[i][3]
            ann["area"] = box_height * box_width
            ######            
            ann["iscrowd"] = iscrowd[i]
            ann["id"] = ann_id
            if "masks" in targets:
                ann["segmentation"] = coco_mask.encode(masks[i].numpy())
            if "keypoints" in targets:
                ann["keypoints"] = keypoints[i]
                ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3])
            dataset["annotations"].append(ann)
            ann_id += 1
    dataset["categories"] = [{"id": i} for i in sorted(categories)]
    coco_ds.dataset = dataset
    coco_ds.createIndex()
    return coco_ds

In [21]:
loader_workers = 2

pin_memory_bool = False

persistent_workers = False


In [22]:
torch_seed_value = np.random.randint(low=0, high=999)

train_Bsize = 8

accum_step = 4

train_epochs = 60 * 4

T_max_setting = train_epochs 

accum_step_lr_scheduler = train_epochs // T_max_setting

lr_init_RCNN = 5e-5

eta_min_setting = lr_init_RCNN * 0.2

FPN_out_channels = 256

val_Bsize = 32

using_dropout = None

In [23]:
data_dir_list = [r'/home/jason/data/DIOR20_dataset/25%-Train-ratio-A',
                 
                ]
#--------------------------
global data_dir
data_dir = data_dir_list[0]

In [24]:
model_selection = 'Faster-RCNN-NViT-fpn-lite'

Roi_align_layers = ['0','1','2','3',]

anchor_sizes = ((24,), (48,), (96,), (144,), (192,))

image_input_size = 640

In [25]:
init_for_training()

faster rcnn using custom SmoothL1Loss is activated!
loading annotations into memory...
Done (t=0.42s)
creating index...
index created!
creating index...
index created!
model is ready for training & testing!!!


In [26]:
model_weights = torch.load(os.path.join('weights', \
                                        'DIOR20_TR25%A_20250204075831_Faster-RCNN-NViT-fpn-lite.pth'))

In [27]:
print(f'Faster-RCNN-LHViT-fpn {model_ft.load_state_dict(model_weights)}')

Faster-RCNN-LHViT-fpn <All keys matched successfully>


In [28]:
evaluate_resize(model_ft, dataloaders['val'], device=device)

Test:  [  0/367]  eta: 0:10:30  model_time: 0.8741 (0.8741)  evaluator_time: 0.2818 (0.2818)  time: 1.7181  data: 0.5379  max mem: 5792
Test:  [100/367]  eta: 0:03:42  model_time: 0.6631 (0.6644)  evaluator_time: 0.0881 (0.1177)  time: 0.8400  data: 0.0255  max mem: 5792
Test:  [200/367]  eta: 0:02:19  model_time: 0.6640 (0.6643)  evaluator_time: 0.1377 (0.1216)  time: 0.8511  data: 0.0252  max mem: 5792
Test:  [300/367]  eta: 0:00:56  model_time: 0.6822 (0.6686)  evaluator_time: 0.1090 (0.1234)  time: 0.8470  data: 0.0258  max mem: 5792
Test:  [366/367]  eta: 0:00:00  model_time: 0.6643 (0.6684)  evaluator_time: 0.0958 (0.1274)  time: 0.8527  data: 0.0253  max mem: 5792
Test: Total time: 0:05:10 (0.8453 s / it)
Averaged stats: model_time: 0.6643 (0.6684)  evaluator_time: 0.0958 (0.1274)
Accumulating evaluation results...
DONE (t=2.46s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.538
 Average Precision  (AP) @[ IoU=0.50      | area=   a

<detection.coco_eval.CocoEvaluator at 0x7f5cd8654220>

In [29]:
torch.__version__

'2.1.0+cu118'

In [30]:
torchvision.__version__

'0.16.0+cu118'

In [31]:
%autosave 10

Autosaving every 10 seconds


In [32]:
#### 