In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import sys
import time
import datetime
import argparse
import tensorboardX
import torch
import yaml
import shutil
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
import torch.nn as nn
sys.path.append('../')

In [2]:
from pycode.dataset import RLBench_dataset, Softargmax_dataset, imageaug_full_transform, train_val_split, pose_aug
from pycode.config import _C as cfg
from pycode.model.Hourglass import stacked_hourglass_model
from pycode.model.VideoPrediction import VIDEO_HOURGLASS
from pycode.loss.mp_loss import Train_Loss_sequence_hourglass
from pycode.misc import save_outputs, build_model_MP, build_dataset_MP, build_dataset_VP, build_optimizer, str2bool, save_args, save_checkpoint, load_checkpoint

kornia requires version >= 3.6. your version 3.6


In [3]:
# parser
parser = argparse.ArgumentParser(description='parser for image generator')
parser.add_argument('--config_file', type=str, default='', metavar='FILE', help='path to config file')
parser.add_argument('--log_step', type=int, default=10, help='')
parser.add_argument('--save_step', type=int, default=100, help='')
parser.add_argument('--eval_step', type=int, default=100, help='')
parser.add_argument('--output_dirname', type=str, default='', help='')
parser.add_argument('--checkpoint_path', type=str, default=None, help='')
parser.add_argument('--log2wandb', type=str2bool, default=True)
args = parser.parse_args(args=['--config_file','../configs/RLBench_MP.yaml','--output_dirname','hoge', '--log2wandb','False'])
# args = parser.parse_args(args=['--checkpoint_path','output/2020-04-02_18:28:18.736004/model_log/checkpoint_epoch9_iter11'])

# get cfg data
if len(args.config_file) > 0:
    print('Loaded configration file {}'.format(args.config_file))
    cfg.merge_from_file(args.config_file)

# define output dirname
if len(args.output_dirname) == 0:
    dt_now = datetime.datetime.now()
    output_dirname = str(dt_now.date()) + '_' + str(dt_now.time())
else:
    output_dirname = args.output_dirname
    
cfg.BASIC.OUTPUT_DIR = os.path.join(cfg.BASIC.OUTPUT_DIR, cfg.DATASET.NAME, output_dirname)

# define save model path
model_path = os.path.join(cfg.BASIC.OUTPUT_DIR, 'model_log')

# make output dir
os.makedirs(cfg.BASIC.OUTPUT_DIR, exist_ok=True)
os.makedirs(model_path, exist_ok=True)

# copy config file
if len(args.config_file) > 0:
    shutil.copy(args.config_file,cfg.BASIC.OUTPUT_DIR)

# save args
argsfile_path = os.path.join(cfg.BASIC.OUTPUT_DIR, "args.txt")
save_args(args,argsfile_path)

# set seed and cuda
torch.manual_seed(cfg.BASIC.SEED)
cuda = torch.cuda.is_available()
device = torch.device(cfg.BASIC.DEVICE)

if cuda:
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed(cfg.BASIC.SEED)

with open(args.config_file) as file:
    obj = yaml.safe_load(file)

Loaded configration file ../configs/RLBench_MP.yaml


In [11]:
# set dataset
train_dataset = build_dataset_MP(cfg, save_dataset=False, mode='train')
val_dataset = build_dataset_MP(cfg, save_dataset=False, mode='val')

# set dataloader
train_dataloader = DataLoader(train_dataset, batch_size=cfg.BASIC.BATCH_SIZE, shuffle=True, num_workers=cfg.BASIC.WORKERS)
val_dataloader = DataLoader(val_dataset, batch_size=cfg.BASIC.BATCH_SIZE, shuffle=False, num_workers=cfg.BASIC.WORKERS)

# set model
model = build_model_MP(cfg)
model = model.to(device)

length of future is 1 frame
load json data
length of future is 1 frame
load json data
use hourglass


In [12]:
from ptflops import get_model_complexity_info
from ptflops.flops_counter import add_flops_counting_methods

model = build_model_MP(cfg)
model = model.to(device)
flops_model = add_flops_counting_methods(model)
flops_model.eval()
flops_model.start_flops_count(ost=sys.stdout, verbose=False, ignore_list=[])

use hourglass


In [13]:
for data in train_dataloader:
    break

In [14]:
_ = flops_model(data)

In [15]:
from ptflops.flops_counter import print_model_with_flops
from ptflops.flops_counter import flops_to_string, params_to_string

flops_count, params_count = flops_model.compute_average_flops_cost()
print_model_with_flops(flops_model, flops_count, params_count, ost=sys.stdout)
flops_model.stop_flops_count()

stacked_hourglass_model(
  25.49 M, 100.000% Params, 42.664 GMac, 100.000% MACs, 
  (initial_conv): ModuleList(
    2.238 M, 8.778% Params, 12.229 GMac, 28.664% MACs, 
    (0): ConvBlock(
      0.007 M, 0.028% Params, 0.039 GMac, 0.092% MACs, 
      (conv): Conv2d(0.007 M, 0.028% Params, 0.038 GMac, 0.090% MACs, 6, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act): ReLU(0.0 M, 0.000% Params, 0.001 GMac, 0.002% MACs, )
    )
    (1): ResidualBlock(
      0.623 M, 2.445% Params, 3.407 GMac, 7.986% MACs, 
      (conv1): ConvBlock(
        0.148 M, 0.579% Params, 0.807 GMac, 1.891% MACs, 
        (conv): Conv2d(0.148 M, 0.579% Params, 0.806 GMac, 1.889% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (act): ReLU(0.0 M, 0.000% Params, 0.001 GMac, 0.002% MACs, )
      )
      (conv2): ConvBlock(
        0.148 M, 0.579% Params, 0.807 GMac, 1.891% MACs, 
        (conv): Conv2d(0.148 M, 0.579% Params, 0.806 GMac, 1.889% MACs, 128, 128, kernel_size=(3

In [16]:
print(flops_to_string(flops_count * flops_model.__batch_counter__))
print(params_to_string(params_count))

511.97 GMac
25.49 M


MP with depth trajectory result

658.75 GMac
27.73 M

MP with depth trajectory result

511.97 GMac
25.49 M

------------------------------

In [4]:
# parser
parser = argparse.ArgumentParser(description='parser for image generator')
parser.add_argument('--config_file', type=str, default='', metavar='FILE', help='path to config file')
parser.add_argument('--log_step', type=int, default=10, help='')
parser.add_argument('--save_step', type=int, default=100, help='')
parser.add_argument('--eval_step', type=int, default=100, help='')
parser.add_argument('--output_dirname', type=str, default='', help='')
parser.add_argument('--checkpoint_path', type=str, default=None, help='')
parser.add_argument('--log2wandb', type=str2bool, default=True)
args = parser.parse_args(args=['--output_dirname','hoge', '--log2wandb','False'])
# args = parser.parse_args(args=['--checkpoint_path','output/2020-04-02_18:28:18.736004/model_log/checkpoint_epoch9_iter11'])

# get cfg data
if len(args.config_file) > 0:
    print('Loaded configration file {}'.format(args.config_file))
    cfg.merge_from_file(args.config_file)

# set seed and cuda
torch.manual_seed(cfg.BASIC.SEED)
cuda = torch.cuda.is_available()
device = torch.device(cfg.BASIC.DEVICE)

if cuda:
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed(cfg.BASIC.SEED)

In [12]:
# get cfg data
cfg.merge_from_file('../configs/RLBench_VP.yaml')

# set dataset
train_dataset = build_dataset_VP(cfg, save_dataset=False, mode='train')
val_dataset = build_dataset_VP(cfg, save_dataset=False, mode='val')

# set dataloader
train_dataloader = DataLoader(train_dataset, batch_size=cfg.BASIC.BATCH_SIZE, shuffle=True, num_workers=cfg.BASIC.WORKERS)
val_dataloader = DataLoader(val_dataset, batch_size=cfg.BASIC.BATCH_SIZE, shuffle=True, num_workers=cfg.BASIC.WORKERS)

vp_model = VIDEO_HOURGLASS(cfg)

length of future is 1 frame
load json data
length of future is 1 frame
load json data


In [13]:
from ptflops import get_model_complexity_info
from ptflops.flops_counter import add_flops_counting_methods

vp_model = vp_model.to(device)
flops_model = add_flops_counting_methods(vp_model)
flops_model.eval()
flops_model.start_flops_count(ost=sys.stdout, verbose=False, ignore_list=[])

In [14]:
for data in train_dataloader:
    break

In [15]:
def make_videomodel_input(inputs, device, sequence_id=0):
    '''
    output:
    dictionary{
    rgb => torch.Tensor shape=(B,S,C,H,W),
    pose => torch.Tensor shape=(B,S,C,H,W)}
    '''
    if cfg.VIDEO_HOUR.MODE == 'pcf':
        index_list = [sequence_id, sequence_id+1, sequence_id+3]
        rgb = inputs['rgb'][:,index_list].to(device)
        pose_heatmap = inputs['pose'][:,:4].to(device)
        pose_xyz = inputs['pose_xyz'][:,:4].to(device)
        rotation_matrix = inputs['rotation_matrix'][:,:4].to(device)
        grasp = inputs['grasp'][:,:4].to(device)
        if cfg.VIDEO_HOUR.INPUT_DEPTH:
            depth = inputs['depth'][:,index_list].to(device)
    elif cfg.VIDEO_HOUR.MODE == 'pc':
        index_list = [sequence_id, sequence_id+1]
        rgb = inputs['rgb'][:,index_list].to(device)
        pose_heatmap = inputs['pose'][:,:3].to(device)
        pose_xyz = inputs['pose_xyz'][:,:3].to(device)
        rotation_matrix = inputs['rotation_matrix'][:,:3].to(device)
        grasp = inputs['grasp'][:,:3].to(device)
        if cfg.VIDEO_HOUR.INPUT_DEPTH:
            depth = inputs['depth'][:,index_list].to(device)
    elif cfg.VIDEO_HOUR.MODE == 'c':
        rgb = inputs['rgb'][:,1].to(device)
        pose_heatmap = inputs['pose'][:,1:3].to(device)
        pose_xyz = inputs['pose_xyz'][:,1:3].to(device)
        rotation_matrix = inputs['rotation_matrix'][:,1:3].to(device)
        grasp = inputs['grasp'][:,1:3].to(device)
        if cfg.VIDEO_HOUR.INPUT_DEPTH:
            depth = inputs['depth'][:,1].to(device)
    
    input_dict = {}
    input_dict['rgb'] = rgb
    input_dict['pose'] = pose_heatmap
    input_dict['pose_xyz'] = pose_xyz
    input_dict['rotation_matrix'] = rotation_matrix
    input_dict['grasp'] = grasp
    if cfg.VIDEO_HOUR.INPUT_DEPTH:
        input_dict['depth'] = depth

    return input_dict

In [16]:
_ = flops_model(make_videomodel_input(data,device))

In [17]:
from ptflops.flops_counter import print_model_with_flops
from ptflops.flops_counter import flops_to_string, params_to_string

flops_count, params_count = flops_model.compute_average_flops_cost()
print_model_with_flops(flops_model, flops_count, params_count, ost=sys.stdout)
flops_model.stop_flops_count()

VIDEO_HOURGLASS(
  19.842 M, 100.000% Params, 28.981 GMac, 100.000% MACs, 
  (img_encoder): Encoder(
    3.647 M, 18.379% Params, 5.988 GMac, 20.662% MACs, 
    (res_blocks): ModuleList(
      3.64 M, 18.344% Params, 5.911 GMac, 20.397% MACs, 
      (0): ResSubsampleBlock(
        0.23 M, 1.158% Params, 2.447 GMac, 8.442% MACs, 
        (shortcut): Sequential(
          0.008 M, 0.042% Params, 0.024 GMac, 0.082% MACs, 
          (0): AvgPool2d(0.0 M, 0.000% Params, 0.001 GMac, 0.002% MACs, kernel_size=2, stride=2, padding=0)
          (1): ConvBlock(
            0.008 M, 0.042% Params, 0.023 GMac, 0.080% MACs, 
            (conv): Conv2d(0.008 M, 0.042% Params, 0.023 GMac, 0.078% MACs, 64, 128, kernel_size=(1, 1), stride=(1, 1))
            (act): ReLU(0.0 M, 0.000% Params, 0.0 GMac, 0.001% MACs, )
          )
        )
        (conv1): ConvBlock(
          0.074 M, 0.372% Params, 0.808 GMac, 2.788% MACs, 
          (conv): Conv2d(0.074 M, 0.372% Params, 0.807 GMac, 2.784% MACs, 64, 12

In [18]:
print(flops_to_string(flops_count * flops_model.__batch_counter__))
print(params_to_string(params_count))

173.88 GMac
19.84 M


VP_pcf

173.88 GMac
19.84 M

VP_pc
173.88 GMac
19.84 M

In [13]:
flops_model.__batch_counter__

6

In [16]:
(256 / 224) ** 2 * 4

5.224489795918367

-------------------

In [17]:
net = torchvision.models.vgg16(pretrained=False)
net = net.to(device)

In [18]:
macs, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))


VGG(
  138.358 M, 100.000% Params, 15.504 GMac, 100.000% MACs, 
  (features): Sequential(
    14.715 M, 10.635% Params, 15.38 GMac, 99.202% MACs, 
    (0): Conv2d(0.002 M, 0.001% Params, 0.09 GMac, 0.580% MACs, 3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0.0 M, 0.000% Params, 0.003 GMac, 0.021% MACs, inplace=True)
    (2): Conv2d(0.037 M, 0.027% Params, 1.853 GMac, 11.951% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0.0 M, 0.000% Params, 0.003 GMac, 0.021% MACs, inplace=True)
    (4): MaxPool2d(0.0 M, 0.000% Params, 0.003 GMac, 0.021% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(0.074 M, 0.053% Params, 0.926 GMac, 5.976% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(0.0 M, 0.000% Params, 0.002 GMac, 0.010% MACs, inplace=True)
    (7): Conv2d(0.148 M, 0.107% Params, 1.851 GMac, 11.941% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)

In [19]:
vp_model.img_encoder.conv

ConvBlock(
  (conv): Conv2d(12, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (act): ReLU()
)

In [23]:
vp_model = VIDEO_HOURGLASS(cfg)
macs, params = get_model_complexity_info(vp_model.img_encoder.conv, (12, 256, 256), as_strings=True, print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

ConvBlock(
  0.007 M, 100.000% Params, 0.461 GMac, 100.000% MACs, 
  (conv): Conv2d(0.007 M, 100.000% Params, 0.457 GMac, 99.091% MACs, 12, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (act): ReLU(0.0 M, 0.000% Params, 0.004 GMac, 0.909% MACs, )
)
Computational complexity:       0.46 GMac
Number of parameters:           6.98 k  


In [31]:
vp_model = VIDEO_HOURGLASS(cfg)
macs, params = get_model_complexity_info(vp_model.img_encoder.res_blocks[0], (64, 256, 256), as_strings=True, print_per_layer_stat=True, verbose=True)
print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
print('{:<30}  {:<8}'.format('Number of parameters: ', params))

ResSubsampleBlock(
  0.23 M, 100.000% Params, 14.68 GMac, 100.000% MACs, 
  (shortcut): Sequential(
    0.008 M, 3.621% Params, 0.143 GMac, 0.971% MACs, 
    (0): AvgPool2d(0.0 M, 0.000% Params, 0.004 GMac, 0.029% MACs, kernel_size=2, stride=2, padding=0)
    (1): ConvBlock(
      0.008 M, 3.621% Params, 0.138 GMac, 0.943% MACs, 
      (conv): Conv2d(0.008 M, 3.621% Params, 0.136 GMac, 0.929% MACs, 64, 128, kernel_size=(1, 1), stride=(1, 1))
      (act): ReLU(0.0 M, 0.000% Params, 0.002 GMac, 0.014% MACs, )
    )
  )
  (conv1): ConvBlock(
    0.074 M, 32.145% Params, 4.849 GMac, 33.029% MACs, 
    (conv): Conv2d(0.074 M, 32.145% Params, 4.84 GMac, 32.971% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (act): ReLU(0.0 M, 0.000% Params, 0.008 GMac, 0.057% MACs, )
  )
  (conv2): Sequential(
    0.148 M, 64.234% Params, 9.689 GMac, 66.000% MACs, 
    (0): ConvBlock(
      0.148 M, 64.234% Params, 9.68 GMac, 65.943% MACs, 
      (conv): Conv2d(0.148 M, 64.234% Params,