In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip "/content/drive/MyDrive/train2017.zip" -d "train2017"
!unzip "/content/drive/MyDrive/val2017.zip" -d "val2017"
!unzip "/content/drive/MyDrive/annotations_trainval2017.zip" -d "annotations"

In [3]:
import os 
path=os.listdir('train2017/train2017')
print(len(path))

118287


In [None]:
!python3 -m pip install --upgrade setuptools pip
!python3 -m pip install nvidia-pyindex
#!python3 -m pip install --upgrade nvidia-tensorrt
!python3 -m pip install nvidia-tensorrt==7.2.* --index-url https://pypi.ngc.nvidia.com

In [None]:
import tensorrt
print(tensorrt.__version__)
assert tensorrt.Builder(tensorrt.Logger())

In [None]:
!git clone https://github.com/NVIDIA/apex.git

In [None]:
%cd apex
!pip install -v --disable-pip-version-check --no-cache-dir ./
%cd ..

In [None]:
!sudo apt install libnvinfer* libnvinfer-dev
!pip install torch2trt-unofficial
#%cd ..
!sudo pip3 install tqdm cython pycocotools
!sudo apt-get install python3-matplotlib
!git clone https://github.com/NVIDIA-AI-IOT/trt_pose
%cd trt_pose 
!sudo apt-get install libprotobuf* protobuf-compiler ninja-build
!sudo python3 setup.py install
!python3 setup.py build_ext --inplace


In [9]:
!mv ../annotations/annotations/ /content/trt_pose
!mv ../val2017/val2017 /content/trt_pose
!mv ../train2017/train2017 /content/trt_pose

mv: cannot move '../val2017/val2017' to '/content/val2017': Directory not empty


In [11]:
!python3 tasks/human_pose/preprocess_coco_person.py annotations/person_keypoints_train2017.json annotations/person_keypoints_train2017_modified.json

Loading...
Preprocessing...
Saving...


In [14]:
!python3 tasks/human_pose/preprocess_coco_person.py annotations/person_keypoints_val2017.json annotations/person_keypoints_val2017_modified.json

Loading...
Preprocessing...
Saving...


In [None]:
import argparse
import subprocess
import torch
import torchvision
import os
import torch.optim
import tqdm
import apex.amp as amp
import time
import json
import pprint
import torch.nn.functional as F
from trt_pose.coco import CocoDataset, CocoHumanPoseEval
from trt_pose.models import MODELS

OPTIMIZERS = {
    'SGD': torch.optim.SGD,
    'Adam': torch.optim.Adam
}

EPS = 1e-6

def set_lr(optimizer, lr):
    for p in optimizer.param_groups:
        p['lr'] = lr
        
        
def save_checkpoint(model, directory, epoch):
    if not os.path.exists(directory):
        os.mkdir(directory)
    filename = os.path.join(directory, 'epoch_%d.pth' % epoch)
    print('Saving checkpoint to %s' % filename)
    torch.save(model.state_dict(), filename)

    
def write_log_entry(logfile, epoch, train_loss, test_loss):
    with open(logfile, 'a+') as f:
        logline = '%d, %f, %f' % (epoch, train_loss, test_loss)
        print(logline)
        f.write(logline + '\n')
        
device = torch.device('cuda')

if __name__ == '__main__':
    
    # parser = argparse.ArgumentParser()
    # parser.add_argument('config')
    # args = parser.parse_args()
    
    # print('Loading config %s' % args.config)
    argsconfig='tasks/human_pose/experiments/resnet18_baseline_att_224x224_A.json'
    with open(argsconfig, 'r') as f:
        config = json.load(f)
        pprint.pprint(config)
        
    logfile_path = argsconfig + '.log'
    
    checkpoint_dir = argsconfig + '.checkpoints'
    if not os.path.exists(checkpoint_dir):
        print('Creating checkpoint directory % s' % checkpoint_dir)
        os.mkdir(checkpoint_dir)
    
        
    # LOAD DATASETS
    
    train_dataset_kwargs = config["train_dataset"]
    train_dataset_kwargs['transforms'] = torchvision.transforms.Compose([
            torchvision.transforms.ColorJitter(**config['color_jitter']),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    test_dataset_kwargs = config["test_dataset"]
    test_dataset_kwargs['transforms'] = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    if 'evaluation' in config:
        evaluator = CocoHumanPoseEval(**config['evaluation'])
    
    train_dataset = CocoDataset(**train_dataset_kwargs)
    test_dataset = CocoDataset(**test_dataset_kwargs)
    
    part_type_counts = test_dataset.get_part_type_counts().float().cuda()
    part_weight = 1.0 / part_type_counts
    part_weight = part_weight / torch.sum(part_weight)
    paf_type_counts = test_dataset.get_paf_type_counts().float().cuda()
    paf_weight = 1.0 / paf_type_counts
    paf_weight = paf_weight / torch.sum(paf_weight)
    paf_weight /= 2.0
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        **config["train_loader"]
    )
    
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        **config["test_loader"]
    )
    
    model = MODELS[config['model']['name']](**config['model']['kwargs']).to(device)
    
    if "initial_state_dict" in config['model']:
        print('Loading initial weights from %s' % config['model']['initial_state_dict'])
        model.load_state_dict(torch.load(config['model']['initial_state_dict']))
    
    optimizer = OPTIMIZERS[config['optimizer']['name']](model.parameters(), **config['optimizer']['kwargs'])
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    
    if 'mask_unlabeled' in config and config['mask_unlabeled']:
        print('Masking unlabeled annotations')
        mask_unlabeled = True
    else:
        mask_unlabeled = False
        
    for epoch in range(config["epochs"]):
        
        if str(epoch) in config['stdev_schedule']:
            stdev = config['stdev_schedule'][str(epoch)]
            print('Adjusting stdev to %f' % stdev)
            train_dataset.stdev = stdev
            test_dataset.stdev = stdev
            
        if str(epoch) in config['lr_schedule']:
            new_lr = config['lr_schedule'][str(epoch)]
            print('Adjusting learning rate to %f' % new_lr)
            set_lr(optimizer, new_lr)
        
        if epoch % config['checkpoints']['interval'] == 0:
            save_checkpoint(model, checkpoint_dir, epoch)
        
           
        
        train_loss = 0.0
        model = model.train()
        for image, cmap, paf, mask in tqdm.tqdm(iter(train_loader)):
            image = image.to(device)
            cmap = cmap.to(device)
            paf = paf.to(device)
            
            if mask_unlabeled:
                mask = mask.to(device).float()
            else:
                mask = torch.ones_like(mask).to(device).float()
            
            optimizer.zero_grad()
            cmap_out, paf_out = model(image)
            
            cmap_mse = torch.mean(mask * (cmap_out - cmap)**2)
            paf_mse = torch.mean(mask * (paf_out - paf)**2)
            
            loss = cmap_mse + paf_mse
            
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
#             loss.backward()
            optimizer.step()
            train_loss += float(loss)
            
        train_loss /= len(train_loader)
        
        test_loss = 0.0
        model = model.eval()
        for image, cmap, paf, mask in tqdm.tqdm(iter(test_loader)):
      
            with torch.no_grad():
                image = image.to(device)
                cmap = cmap.to(device)
                paf = paf.to(device)
                mask = mask.to(device).float()

                if mask_unlabeled:
                    mask = mask.to(device).float()
                else:
                    mask = torch.ones_like(mask).to(device).float()
                
                cmap_out, paf_out = model(image)
                
                cmap_mse = torch.mean(mask * (cmap_out - cmap)**2)
                paf_mse = torch.mean(mask * (paf_out - paf)**2)

                loss = cmap_mse + paf_mse

                test_loss += float(loss)
        test_loss /= len(test_loader)
        
        write_log_entry(logfile_path, epoch, train_loss, test_loss)
        
        
        if 'evaluation' in config:
            evaluator.evaluate(model, train_dataset.topology)

{'checkpoints': {'interval': 3},
 'color_jitter': {'brightness': 0.05,
                  'contrast': 0.05,
                  'hue': 0.01,
                  'saturation': 0.05},
 'epochs': 250,
 'lr_schedule': {'0': 0.001, '150': 1e-05, '75': 0.0001},
 'model': {'kwargs': {'cmap_channels': 18,
                      'num_upsample': 3,
                      'paf_channels': 42,
                      'upsample_channels': 256},
           'name': 'resnet18_baseline_att'},
 'optimizer': {'kwargs': {'lr': 0.001}, 'name': 'Adam'},
 'stdev_schedule': {'0': 0.025},
 'test_dataset': {'annotations_file': 'annotations/person_keypoints_val2017_modified.json',
                  'category_name': 'person',
                  'image_shape': [224, 224],
                  'images_dir': 'val2017',
                  'is_bmp': False,
                  'random_angle': [-0.0, 0.0],
                  'random_scale': [1.0, 1.0],
                  'random_translate': [-0.0, 0.0],
                  'stdev': 0.025,
 

2693it [00:07, 346.84it/s]


Saving to intermediate tensors to cache file...


  cpuset_checked))
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Adjusting stdev to 0.025000
Adjusting learning rate to 0.001000
Saving checkpoint to tasks/human_pose/experiments/resnet18_baseline_att_224x224_A.json.checkpoints/epoch_0.pth


100%|██████████| 1002/1002 [17:25<00:00,  1.04s/it]
100%|██████████| 43/43 [00:31<00:00,  1.36it/s]

0, 0.003695, 0.003446



100%|██████████| 1002/1002 [17:27<00:00,  1.05s/it]
100%|██████████| 43/43 [00:31<00:00,  1.38it/s]

1, 0.002796, 0.003201



100%|██████████| 1002/1002 [17:18<00:00,  1.04s/it]
100%|██████████| 43/43 [00:31<00:00,  1.37it/s]


2, 0.002598, 0.003029
Saving checkpoint to tasks/human_pose/experiments/resnet18_baseline_att_224x224_A.json.checkpoints/epoch_3.pth


100%|██████████| 1002/1002 [17:26<00:00,  1.04s/it]
100%|██████████| 43/43 [00:31<00:00,  1.38it/s]

3, 0.002454, 0.002777



100%|██████████| 1002/1002 [17:13<00:00,  1.03s/it]
100%|██████████| 43/43 [00:30<00:00,  1.39it/s]

4, 0.002357, 0.002708



100%|██████████| 1002/1002 [17:12<00:00,  1.03s/it]
100%|██████████| 43/43 [00:30<00:00,  1.41it/s]


5, 0.002279, 0.002657
Saving checkpoint to tasks/human_pose/experiments/resnet18_baseline_att_224x224_A.json.checkpoints/epoch_6.pth


100%|██████████| 1002/1002 [17:13<00:00,  1.03s/it]
100%|██████████| 43/43 [00:30<00:00,  1.41it/s]

6, 0.002228, 0.002580



100%|██████████| 1002/1002 [17:13<00:00,  1.03s/it]
100%|██████████| 43/43 [00:30<00:00,  1.40it/s]

7, 0.002170, 0.002521



100%|██████████| 1002/1002 [17:15<00:00,  1.03s/it]
100%|██████████| 43/43 [00:31<00:00,  1.38it/s]


8, 0.002129, 0.002519
Saving checkpoint to tasks/human_pose/experiments/resnet18_baseline_att_224x224_A.json.checkpoints/epoch_9.pth


100%|██████████| 1002/1002 [17:16<00:00,  1.03s/it]
100%|██████████| 43/43 [00:30<00:00,  1.40it/s]

9, 0.002097, 0.002486



100%|██████████| 1002/1002 [17:16<00:00,  1.03s/it]
100%|██████████| 43/43 [00:30<00:00,  1.39it/s]

10, 0.002065, 0.002522



 47%|████▋     | 466/1002 [08:09<12:41,  1.42s/it]