In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
sys.path.insert(0, "/home/islandz/Documents/Horse/HRNet/deep-high-resolution-net.pytorch")

In [31]:
import argparse
import os
import pprint
import shutil
import pandas as pd
from yacs.config import CfgNode as CN
import cv2

import torch
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter

from tools import _init_paths
from lib.config import cfg
from lib.config import update_config
from lib.core.loss import JointsMSELoss
from lib.core.function import train
from lib.core.function import validate
from lib.utils.utils import get_optimizer
from lib.utils.utils import save_checkpoint
from lib.utils.utils import create_logger
from lib.utils.utils import get_model_summary

from lib.dataset.coco import COCODataset
from lib.models.pose_hrnet import get_pose_net

In [3]:
csv_data = pd.read_csv("horse.csv")

In [4]:
img_list = list(csv_data["image"])

In [5]:
root = "/home/islandz/Documents/Horse"

In [27]:
dataset = COCODataset(cfg, root, img_list, is_train=True, transform=None)

=> num_images: 142
=> num_images: 142
=> num_images: 142


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [7]:
STAGE2 = CN()
STAGE2.NUM_MODULES = 1
STAGE2.NUM_BRANCHES = 2
STAGE2.NUM_BLOCKS = [4, 4]
STAGE2.NUM_CHANNELS = [32, 64]
STAGE2.BLOCK = 'BASIC'
STAGE2.FUSE_METHOD = 'SUM'

STAGE3 = CN()
STAGE3.NUM_MODULES = 1
STAGE3.NUM_BRANCHES = 3
STAGE3.NUM_BLOCKS = [4, 4, 4]
STAGE3.NUM_CHANNELS = [32, 64, 128]
STAGE3.BLOCK = 'BASIC'
STAGE3.FUSE_METHOD = 'SUM'

STAGE4 = CN()
STAGE4.NUM_MODULES = 1
STAGE4.NUM_BRANCHES = 4
STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
STAGE4.BLOCK = 'BASIC'
STAGE4.FUSE_METHOD = 'SUM'

In [8]:
model = get_pose_net(cfg, STAGE2, STAGE3, STAGE4, is_train=True)

In [13]:
def parse_args():
    parser = argparse.ArgumentParser(description='Train keypoints network')
    # general
    parser.add_argument('--cfg',
                        help='experiment configure file name',
                        required=True,
                        type=str)

    parser.add_argument('opts',
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    # philly
    parser.add_argument('--modelDir',
                        help='model directory',
                        type=str,
                        default='')
    parser.add_argument('--logDir',
                        help='log directory',
                        type=str,
                        default='')
    parser.add_argument('--dataDir',
                        help='data directory',
                        type=str,
                        default='')
    parser.add_argument('--prevModelDir',
                        help='prev Model directory',
                        type=str,
                        default='')

    args = parser.parse_args()

    return args

In [19]:
logger, final_output_dir, tb_log_dir = create_logger(
    cfg, "Output", 'train')

=> creating coco/pose_hrnet/Output
=> creating coco/pose_hrnet/Output_2022-11-18-17-21


In [20]:
# cudnn related setting
cudnn.benchmark = cfg.CUDNN.BENCHMARK
torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

In [21]:
writer_dict = {
    'writer': SummaryWriter(log_dir=tb_log_dir),
    'train_global_steps': 0,
    'valid_global_steps': 0,
}

dump_input = torch.rand(
    (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
)
writer_dict['writer'].add_graph(model, (dump_input, ))

logger.info(get_model_summary(model, dump_input))


Total Parameters: 9,301,910
----------------------------------------------------------------------------------------------------------------------------------
Total Multiply Adds (For Convolution and Linear Layers only): 4.316820859909058 GFLOPs
----------------------------------------------------------------------------------------------------------------------------------
Number of Layers
Conv2d : 104 layers   BatchNorm2d : 103 layers   ReLU : 97 layers   Bottleneck : 4 layers   BasicBlock : 36 layers   Upsample : 7 layers   HighResolutionModule : 3 layers   

Total Parameters: 9,301,910
----------------------------------------------------------------------------------------------------------------------------------
Total Multiply Adds (For Convolution and Linear Layers only): 4.316820859909058 GFLOPs
----------------------------------------------------------------------------------------------------------------------------------
Number of Layers
Conv2d : 104 layers   BatchNorm2d : 

In [23]:
#model.cuda()

# define loss function (criterion) and optimizer
criterion = JointsMSELoss(
    use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
)#.cuda()

# Data loading code
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)

train_loader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=cfg.TRAIN.SHUFFLE,
    num_workers=cfg.WORKERS,
    pin_memory=cfg.PIN_MEMORY
)

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [None]:
best_perf = 0.0
best_model = False
last_epoch = -1
optimizer = get_optimizer(cfg, model)
begin_epoch = cfg.TRAIN.BEGIN_EPOCH
checkpoint_file = os.path.join(
    final_output_dir, 'checkpoint.pth'
)

if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
    logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
    checkpoint = torch.load(checkpoint_file)
    begin_epoch = checkpoint['epoch']
    best_perf = checkpoint['perf']
    last_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])

    optimizer.load_state_dict(checkpoint['optimizer'])
    logger.info("=> loaded checkpoint '{}' (epoch {})".format(
        checkpoint_file, checkpoint['epoch']))

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
    last_epoch=last_epoch
)

In [None]:
for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
    lr_scheduler.step()

    # train for one epoch
    train(cfg, train_loader, model, criterion, optimizer, epoch,
          final_output_dir, tb_log_dir)
    
    logger.info('=> saving checkpoint to {}'.format(final_output_dir))
    save_checkpoint({
        'epoch': epoch + 1,
        'model': cfg.MODEL.NAME,
        'state_dict': model.state_dict(),
        'best_state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }, best_model, final_output_dir)

final_model_state_file = os.path.join(
    final_output_dir, 'final_state.pth'
)
logger.info('=> saving final model state to {}'.format(
    final_model_state_file)
)
torch.save(model.module.state_dict(), final_model_state_file)