Spliting train and test data

In [None]:
import json
import random

# Opening JSON file
f = open('train.json')

# Reading only the first 6000 data without background image
data = json.load(f)[:6000]

random.shuffle(data)

# dummy_train = data[:10]
train = data[:int(len(data)*0.7)]
validation = data[int(len(data)*0.7):int(len(data)*0.9)]
test = data[-int(len(data)*0.1):]
def key_return(elem):
    return elem['filename']

train.sort(key=key_return)
test.sort(key=key_return)
validation.sort(key=key_return)
# dummy_train.sort(key=key_return)
#
# out_dummy = open("new_labels/without_back/dummy_train.json", "w")
#
# json.dump(dummy_train, out_dummy)
#
# out_dummy.close()

out_train = open("Datasets/speed/synthetic/train.json", "w")

json.dump(train, out_train)

out_train.close()


out_test = open("Datasets/speed/synthetic/test.json", "w")

json.dump(test, out_test)

out_test.close()


out_validation = open("Datasets/speed/synthetic/validation.json", "w")

json.dump(validation, out_validation)

out_validation.close()

f.close()

Preprocessing

In [None]:
'''
Copyright (c) 2022 SLAB Group
Licensed under MIT License (see LICENSE.md)
Author: Tae Ha Park (tpark94@stanford.edu)
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import shutil
import zipfile


import easydict
import json
import argparse
import os
import numpy as np
import cv2
from tqdm import tqdm

import _init_paths

from core.config import cfg, update_config
from core.utils.utils import load_camera_intrinsics, load_tango_3d_keypoints
from core.utils.postprocess import project_keypoints


def main(args):

    update_config(cfg, args)

    datadir = os.path.join(cfg.DATASET.ROOT)

    # Read labels from JSON file
    jsonfile = args.jsonfile
    print(f'Reading JSON file from {jsonfile}...')
    with open(jsonfile, 'r') as f:
        labels = json.load(f) # list

    # Read camera
    camera = load_camera_intrinsics(cfg.DATASET.CAMERA)

    # Read Tango 3D keypoints
    keypts3d = load_tango_3d_keypoints(cfg.DATASET.KEYPOINTS) # (11, 3) [m]

    # Where to save CSV?
    if cfg.DATASET.DATANAME == 'speed':
        domain, split = args.jsonfile.split('/')[-2:]
    elif cfg.DATASET.DATANAME == 'prisma25':
        domain, split = '', args.jsonfile
    elif 'shirt' in cfg.DATASET.DATANAME:
        traj, domain, split = args.jsonfile.split('/')
        domain = traj + '/' + domain
    else:
        raise NotImplementedError('Only accepting speedplus and prisma25')
    outdir = os.path.join(datadir, domain, 'labels')
    if not os.path.exists(outdir): os.makedirs(outdir)
    csvfile = os.path.join(outdir, split.replace('json', 'csv'))
    print(f'Label CSV file will be saved to {csvfile}')

    # Where to save resized image?
    imagedir = os.path.join(datadir, domain,
            f'images_{cfg.DATASET.INPUT_SIZE[0]}x{cfg.DATASET.INPUT_SIZE[1]}_RGB')
    if not os.path.exists(imagedir): os.makedirs(imagedir)
    print(f'Resized images will be saved to {imagedir}')

    if args.load_masks:
        maskdir = os.path.join(datadir, domain,
            f'masks_{int(cfg.DATASET.INPUT_SIZE[0]/cfg.DATASET.OUTPUT_SIZE[0])}x{int(cfg.DATASET.INPUT_SIZE[1]/cfg.DATASET.OUTPUT_SIZE[0])}')
        if not os.path.exists(maskdir): os.makedirs(maskdir)
        print(f'Resized masks will be saved to {maskdir}')

    # Open
    csv = open(csvfile, 'w')

    for idx in tqdm(range(len(labels))):

        # ---------- Read image & resize & save
        filename = labels[idx]['filename']
        image    = cv2.imread(os.path.join(datadir,cfg.DATASET.DATANAME, domain, 'images', filename), cv2.IMREAD_COLOR)
        image    = cv2.resize(image, cfg.DATASET.INPUT_SIZE)
        cv2.imwrite(os.path.join(imagedir, filename), image)

        # ---------- Read mask & resize & save
        if args.load_masks:
            mask = cv2.imread(os.path.join(datadir,cfg.DATASET.DATANAME , domain, 'masks', filename), cv2.IMREAD_GRAYSCALE)
            # print(os.path.join(datadir,cfg.DATASET.DATANAME , domain, 'masks', filename))
            mask = cv2.resize(mask, [int(s / cfg.DATASET.OUTPUT_SIZE[0]) for s in cfg.DATASET.INPUT_SIZE])
            cv2.imwrite(os.path.join(maskdir, filename), mask)


        # ---------- Read labels
        if args.load_labels:
            q_vbs2tango = np.array(labels[idx]['q_vbs2tango'], dtype=np.float32)
            r_Vo2To_vbs = np.array(labels[idx]['r_Vo2To_vbs_true'], dtype=np.float32)

        # ---------- Project keypoints & origin
        if args.load_labels:
            # Attach origin
            keypts3d_origin = np.concatenate((np.zeros((3,1), dtype=np.float32),
                                            keypts3d), axis=1) # [3, 12]

            keypts2d = project_keypoints(q_vbs2tango,
                                        r_Vo2To_vbs,
                                        camera['cameraMatrix'],
                                        camera['distCoeffs'],
                                        keypts3d_origin) # (2, 12)

            keypts2d[0] = keypts2d[0] / camera['Nu']
            keypts2d[1] = keypts2d[1] / camera['Nv']
            # Into vector (x0, y0, kx1, ky1, ..., kx11, ky11)
            keypts2d_vec = np.reshape(np.transpose(keypts2d), (24,))

        # ---------- Bounding box labels
        # If masks are available, get them from masks
        # If not, use keypoints instead
        if args.load_labels:
            # if args.load_masks:
            #     seg  = np.where(mask > 0)
            #     # print(mask.shape)
            #     xmin = np.min(seg[1]) / mask.shape[1]#camera['Nu']
            #     ymin = np.min(seg[0]) / mask.shape[0]#camera['Nv']
            #     xmax = np.max(seg[1]) / mask.shape[1]#camera['Nu']
            #     ymax = np.max(seg[0]) / mask.shape[0]#camera['Nv']
            # else:
            xmin = np.min(keypts2d[0])
            ymin = np.min(keypts2d[1])
            xmax = np.max(keypts2d[0])
            ymax = np.max(keypts2d[1])
        # CSV row
        row = [filename]

        if args.load_labels:
            row = row + [xmin, ymin, xmax, ymax] \
                      + q_vbs2tango.tolist() \
                      + r_Vo2To_vbs.tolist() \
                      + keypts2d_vec.tolist()

        row = ', '.join([str(e) for e in row])

        # Write
        csv.write(row + '\n')

    csv.close()




if __name__=='__main__':
  files_name = ['validation.json','train.json','test.json']
  for file_name in files_name:
      args = easydict.EasyDict({
      'cfg' : 'experiments/offline_train_full_config_phi3_BN_speed.yaml',
      'jsonfile' : 'Datasets/speed/synthetic/'+file_name,
      'no_masks' : False,
      'no_labels' : False,
      'load_masks' : True,
      'load_labels' : True,
      'opts' : []
      })
      main(args)
  source = 'drive/MyDrive/Datasets/styles_768x512_RGB.zip'
  destination  = 'Datasets/synthetic/styles_768x512_RGB.zip'

  shutil.copy2(source, destination)

  print('done\n\n')


Reading JSON file from Datasets/speed/synthetic/validation.json...
Label CSV file will be saved to Datasets/synthetic/labels/validation.csv
Resized images will be saved to Datasets/synthetic/images_768x512_RGB


100%|██████████| 600/600 [00:17<00:00, 35.01it/s]


done


Reading JSON file from Datasets/speed/synthetic/train.json...
Label CSV file will be saved to Datasets/synthetic/labels/train.csv
Resized images will be saved to Datasets/synthetic/images_768x512_RGB


100%|██████████| 5400/5400 [02:34<00:00, 34.92it/s]

done







Train Code

In [None]:
'''
Copyright (c) 2022 SLAB Group
Licensed under MIT License (see LICENSE.md)
Author: Tae Ha Park (tpark94@stanford.edu)
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import os.path as osp
import argparse
import warnings

import torch
import torch.distributed as dist
import torch.multiprocessing as mp

import easydict 


import _init_paths
# dist.dist_backend = 'gloo'
from core.config import cfg, update_config
from core.nets import build_spnv2
from core.dataset import get_dataloader
from core.solver import get_optimizer, adjust_learning_rate, get_scaler
from core.engine.trainer import do_train
from core.engine.inference import do_valid
from core.utils.checkpoints import load_checkpoint, save_checkpoint
from core.utils.utils import set_seeds_cudnn, setup_logger, create_logger_directories, \
    write_model_info, load_camera_intrinsics, load_tango_3d_keypoints


def parse_args():
    # parser = argparse.ArgumentParser(description='Train SPNv2')
    args = easydict.EasyDict({
         'cfg' : 'experiments/offline_train_full_config_phi3_BN_speed.yaml',
         'opts' : [],
         'gpu' : None,
         'world_size' : 1,
         'rank' : 0,
         'dist_url' : 'tcp://127.0.0.1:23456'
         })
    return args


train_results = []
valid_results = []

def main(cfg,overfit_limit):
    args = parse_args()
    update_config(cfg, args)

    cfg.defrost()
    cfg.DIST.RANK = args.rank
    cfg.freeze()

    _, output_dir, log_dir = \
        create_logger_directories(cfg, phase='train', write_cfg_to_file=True)

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    ngpus_per_node = torch.cuda.device_count()

    main_worker(
          args.gpu,
          ngpus_per_node,
          args,
          output_dir,
          log_dir,
          overfit_limit
        )


def main_worker(gpu, ngpus_per_node, args, output_dir, log_dir, overfit_limit):
    # Set all seeds & cudNN
    set_seeds_cudnn(cfg, seed=cfg.SEED)

    # GPU?
    args.gpu = gpu
    if args.gpu is not None:
        print(f'Use GPU: {args.gpu} for training')

    update_config(cfg, args)
    args.distributed = False
    # setup logger
    logger = setup_logger(log_dir, args.rank, 'train', to_console=False)

    # build network
    model = build_spnv2(cfg)

    # GPU device
    if args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        device = torch.device('cuda', args.gpu)
    else:
        device = torch.device('cuda')


    model = model.to(device)

    # write model summary to file
    write_model_info(model.module if args.distributed else model, log_dir)

    # Dataloaders
    train_loader = get_dataloader(cfg,
                                  split='train',
                                  distributed=args.distributed,
                                  load_labels=True)
    val_loader = get_dataloader(cfg,
                                split='val',
                                distributed=args.distributed,
                                load_labels=True)

    # Optimizer & scaler for mixed-precision training
    optimizer = get_optimizer(cfg, model)
    scaler = get_scaler(cfg)  # None if cfg.FP16 = False, cfg.CUDA = False

    # Load checkpoints
    checkpoint_file = osp.join(output_dir, f'checkpoint.pth.tar')
    if cfg.AUTO_RESUME and osp.exists(checkpoint_file):
        last_epoch, best_score = load_checkpoint(
            checkpoint_file,
            model,
            optimizer,
            scaler,
            device)
        begin_epoch = last_epoch
    else:
        begin_epoch = cfg.TRAIN.BEGIN_EPOCH
        last_epoch = -1
        best_score = 1e20

    # For validation
    camera = load_camera_intrinsics(cfg.DATASET.CAMERA)
    keypts_true_3D = load_tango_3d_keypoints(cfg.DATASET.KEYPOINTS)

    # ---------------------------------------
    # Main loop
    # ---------------------------------------
    score = best_score
    is_best = False
    is_final = False
    counter = 0
    for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
        print('')
        # Learning rate adjustment
        adjust_learning_rate(optimizer, epoch, cfg)

        # Single epoch training
        train_results.append(do_train(epoch,
                 cfg,
                 model,
                 train_loader,
                 optimizer,
                 log_dir=log_dir,
                 device=device,
                 scaler=scaler,
                 rank=args.rank))
        # Validate on validation set
        score = do_valid(epoch,
                         cfg,
                         model,
                         val_loader,
                         camera,
                         keypts_true_3D,
                         log_dir=None,
                         device=device)

        if score < best_score:
            best_score = score
            is_best = True
            counter = 0
        else:
            is_best = False
            counter += 1
        valid_results.append(score)

        # Save
        save_checkpoint({
            'epoch': epoch + 1,
            'backbone': cfg.MODEL.BACKBONE.NAME,
            'heads': cfg.MODEL.HEAD.NAMES,
            'state_dict': model.state_dict(),
            'best_state_dict': model.module.state_dict() if args.distributed else model.state_dict(),
            'best_score': best_score,
            'optimizer': optimizer.state_dict(),
            'scaler': scaler.state_dict() if scaler is not None else None
        }, is_best, epoch + 1 == cfg.TRAIN.END_EPOCH or counter == overfit_limit, output_dir)
        if counter == overfit_limit:
          break
if __name__ == '__main__':
    main(cfg,3)


Test Code

In [None]:
'''
Copyright (c) 2022 SLAB Group
Licensed under MIT License (see LICENSE.md)
Author: Tae Ha Park (tpark94@stanford.edu)
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path as osp
import argparse

import torch

import _init_paths

from core.config import cfg, update_config
from core.nets   import build_spnv2
from core.dataset import get_dataloader
from core.engine.inference  import do_valid
from core.utils.utils import set_seeds_cudnn, create_logger_directories, \
                        load_camera_intrinsics, load_tango_3d_keypoints

def parse_args():
    parser = argparse.ArgumentParser(description='Test on SPNv2')

    # general
    parser.add_argument('--cfg',
                        help='experiment configure file name',
                        required=True,
                        type=str)

    parser.add_argument('opts',
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    args = parser.parse_args()

    return args

def main(cfg):
    args = parse_args()
    update_config(cfg, args)

    # Load model to test
    test_model = osp.join(cfg.OUTPUT_DIR, cfg.TEST.MODEL_FILE)
    if not osp.exists(test_model) or osp.isdir(test_model):
        test_model = 'outputs/efficientdet_d3/full_config/model_best.pth.tar'
    cfg.defrost()
    cfg.TEST.MODEL_FILE = test_model
    cfg.freeze()

    # Logger & directories
    logger, output_dir, _ = create_logger_directories(cfg, 'test')

    # Set all seeds & cudNN
    set_seeds_cudnn(cfg, seed=cfg.SEED)

    # GPU?
    device = torch.device('cuda:0') if cfg.CUDA and torch.cuda.is_available() else torch.device('cpu')

    # Complete network
    model = build_spnv2(cfg)

    # Load checkpoint
    if cfg.TEST.MODEL_FILE:
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE, map_location='cpu'), strict=True)
        logger.info('   - Model loaded from {}'.format(cfg.TEST.MODEL_FILE))
    model = model.to(device)

    # Dataloaders
    test_loader = get_dataloader(cfg, split='test', load_labels=True)

    # For validation
    camera = load_camera_intrinsics(cfg.DATASET.CAMERA)
    keypts_true_3D = load_tango_3d_keypoints(cfg.DATASET.KEYPOINTS)

    # ---------------------------------------
    # Main Test
    # ---------------------------------------
    score = do_valid(0,
                     cfg,
                     model,
                     test_loader,
                     camera,
                     keypts_true_3D,
                     valid_fraction=None,
                     log_dir=output_dir,
                     device=device)

if __name__=='__main__':
    main(cfg)


Testing 001 [599/600] [  69.7 (  73.0) ms]	effi_iou   0.93 (  0.85) 	effi_eR  10.47 ( 17.02) deg	effi_eT   0.11 (  0.62) m	effi_pose   0.22 (  0.35) 

Onlie Domain Refinment

In [None]:
'''
Copyright (c) 2022 SLAB Group
Licensed under MIT License (see LICENSE.md)
Author: Tae Ha Park (tpark94@stanford.edu)
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import time

import torch

import _init_paths

import easydict 

from core.config  import cfg, update_config
from core.nets    import build_spnv2
from core.dataset import get_dataloader
from core.solver  import get_optimizer, get_scaler
from core.engine.adapter    import do_adapt
from core.engine.inference  import do_valid
from core.utils.checkpoints import save_checkpoint
from core.utils.utils import set_seeds_cudnn, setup_logger, create_logger_directories, \
                        load_camera_intrinsics, load_tango_3d_keypoints, \
                        write_model_info, num_trainable_parameters

def parse_args():
    parser = argparse.ArgumentParser(description='TTDR')

    args = easydict.EasyDict({
      'cfg' : 'experiments/odr_phi3_B4_N1024.yaml',
      'opts' : []
      })


    return args

def main(cfg):
    args = parse_args()
    update_config(cfg, args)

    # Set global seed, if not provided
    seed = int(time.time()) if cfg.SEED is None else cfg.SEED

    cfg.defrost()
    cfg.SEED = seed
    cfg.freeze()

    _, output_dir, log_dir = \
        create_logger_directories(cfg, phase='train', write_cfg_to_file=True)

    # No distributed training for TTDR
    main_worker(
        output_dir,
        log_dir
    )

def main_worker(output_dir, log_dir):

    # Set all seeds & cudNN
    set_seeds_cudnn(cfg, seed=cfg.SEED)

    # setup logger
    logger = setup_logger(log_dir, 0, 'train', to_console=False)

    # build network
    model = build_spnv2(cfg)

    # GPU device
    device = torch.device('cuda')
    model  = model.to(device)

    # write model summary to file
    write_model_info(model, log_dir)

    # disable entire model grads
    model.eval()
    model.requires_grad_(False)

    # -------------------------------------------------------------
    # For gradient accumulation with BatchNorm layers
    # -------------------------------------------------------------
    # Here, we manually update BatchNorm's running stats
    def get_bn_features_from_name(name):
        def bn_feature_hook(module, input, output):
            bn_features[name] = input[0].detach()
        return bn_feature_hook

    bn_features = {}
    handles = []
    for n, m in model.backbone.named_modules():
        if not cfg.MODEL.USE_GROUPNORM_BACKBONE and isinstance(m, torch.nn.BatchNorm2d):
            # (1) Set BatchNorm layers to eval mode, so that running states are
            #     not updated by every forward() calls
            m.requires_grad_(True)
            m.eval()

            # (2) Keep the record of input features to BatchNorm layers
            h = m.register_forward_hook(get_bn_features_from_name(n))
            handles.append(h)

        elif cfg.MODEL.USE_GROUPNORM_BACKBONE and isinstance(m, torch.nn.GroupNorm):
            # GroupNorm layers -- simply allow requires_grad
            m.requires_grad_(True)

    logger.info(f'Total number of parameters with requires_grad=True')
    logger.info(f'   - {num_trainable_parameters(model):,d}')

    # Dataloaders
    train_loader = get_dataloader(cfg,
                                  split='train',
                                  distributed=False,
                                  load_labels=False) # No labels during TTDR
    val_loader   = get_dataloader(cfg,
                                  split='val',
                                  distributed=False,
                                  load_labels=True)

    # Optimizer & scaler for mixed-precision training
    optimizer = get_optimizer(cfg, model)
    scaler    = get_scaler(cfg)

    # For validation
    camera = load_camera_intrinsics(cfg.DATASET.CAMERA)
    keypts_true_3D = load_tango_3d_keypoints(cfg.DATASET.KEYPOINTS)

    # ---------------------------------------
    # Main ODR
    # ---------------------------------------
    # Single epoch training
    do_adapt(0,
             cfg,
             model,
             bn_features,
             train_loader,
             optimizer,
             log_dir=log_dir,
             device=device,
             scaler=scaler)

    # Remove hooks
    for h in handles:
        h.remove()

    # Validate on the fraction of dataset
    # score = 0
    score = do_valid(0,
                    cfg,
                    model,
                    val_loader,
                    camera,
                    keypts_true_3D,
                    valid_fraction=1.0,
                    log_dir=output_dir,
                    device=device)

    # Save
    save_checkpoint({
        'epoch': 1,
        'backbone': cfg.MODEL.BACKBONE.NAME,
        'heads': cfg.MODEL.HEAD.NAMES,
        'state_dict': model.state_dict(),
        'best_state_dict': model.state_dict(),
        'best_score': score,
        'optimizer': optimizer.state_dict(),
        'scaler': None
    }, False, True, output_dir)

    logger.info('\n\n')

if __name__=='__main__':
    main(cfg)


ODR 001 [1024/1024] [ 134.5 ( 119.3) ms]	ent 7.79e-04 (2.31e-03) 
Testing 001 [599/600] [  61.1 (  62.1) ms]	effi_iou   0.90 (  0.77) 	effi_eR   1.44 ( 14.11) deg	effi_eT   0.02 (  2.41) m	effi_pose   0.03 (  0.38) 