In [1]:
import os
import torch
import numpy as np
import pandas as pd
import sys
import random
import h5py
from torch.utils.tensorboard import SummaryWriter

sys.path.append('../')

from models.smc.src import *
from models.smc.src.crowd_count import *
from models.smc.src.network import *
from models.smc.src.data_loader import ImageDataLoader
from models.smc.src.timer import *
from models.smc.src.evaluate_model import *
from models.smc.src import utils

In [2]:
# Check to see if device can be trained on GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

print(device)


cuda


In [3]:
# Cuda configurations

torch.cuda.empty_cache()
print(torch.cuda.memory_summary(device=None, abbreviated=False))

current_device = torch.cuda.current_device()
current_device_name = torch.cuda.get_device_name(current_device)
memory_allocated = torch.cuda.memory_allocated()
memory_cached = torch.cuda.memory_cached()

print(
    f'Using gpu {current_device_name} with device number {current_device}.\n'
    f'Memory allocated = {memory_allocated}\n'
    f'Memory cached = {memory_cached}'
)

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------



In [4]:
try:
    from termcolor import cprint
except ImportError:
    cprint = None


def log_print(text, color=None, on_color=None, attrs=None):
    if cprint is not None:
        cprint(text, color=color, on_color=on_color, attrs=attrs)
    else:
        print(text)

In [5]:
# Directory Configurations

method = 'smc'
dataset_name = 'shtechB'
output_dir = f'../output/{method}/saved_models'

# Training data path
train_path = '../data/SHT/part_B_final/train_data/consolidated'
train_gt_path = '../data/SHT/part_B_final/train_data/ground_truth'

# Validation data path
val_path = '../data/SHT/part_B_final/test_data/consolidated'
val_gt_path = '../data/SHT/part_B_final/test_data/ground_truth'

In [6]:
# Create output directory if it doesnt exist

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [7]:
# load model

is_cuda = True  # Determine if we should use the CPU to train or GPU

model = CrowdCounter(is_cuda=is_cuda)  # is_cuda determines if all the input tensors should be converted to cuda tensors
network.weights_normal_init(model, dev=0.01)
model.train()


CrowdCounter(
  (model): SMC(
    (r1): Sequential(
      (0): Conv2d(
        (conv): Conv2d(3, 16, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
        (relu): ReLU(inplace=True)
      )
      (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): Conv2d(
        (conv): Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(
        (conv): Conv2d(32, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
      (5): Conv2d(
        (conv): Conv2d(16, 8, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
    )
    (r2_1): Sequential(
      (0): Conv2d(
        (conv): Conv2d(3, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
      (1): MaxPool2d(kernel_s

In [8]:
# Model parameters

for name, param in model.named_parameters():
    print(f'{name}\t{param.device}\t{param.shape}')

# Print model's state_dict
print("\nModel's state_dict: \n")
for k, v in model.state_dict().items():
    print(k, "\t", v.dtype)

model.r1.0.conv.weight	cpu	torch.Size([16, 3, 9, 9])
model.r1.0.conv.bias	cpu	torch.Size([16])
model.r1.2.conv.weight	cpu	torch.Size([32, 16, 7, 7])
model.r1.2.conv.bias	cpu	torch.Size([32])
model.r1.4.conv.weight	cpu	torch.Size([16, 32, 7, 7])
model.r1.4.conv.bias	cpu	torch.Size([16])
model.r1.5.conv.weight	cpu	torch.Size([8, 16, 7, 7])
model.r1.5.conv.bias	cpu	torch.Size([8])
model.r2_1.0.conv.weight	cpu	torch.Size([20, 3, 7, 7])
model.r2_1.0.conv.bias	cpu	torch.Size([20])
model.r2_1.2.conv.weight	cpu	torch.Size([40, 20, 5, 5])
model.r2_1.2.conv.bias	cpu	torch.Size([40])
model.r2_1.4.conv.weight	cpu	torch.Size([20, 40, 5, 5])
model.r2_1.4.conv.bias	cpu	torch.Size([20])
model.r2_1.5.conv.weight	cpu	torch.Size([10, 20, 5, 5])
model.r2_1.5.conv.bias	cpu	torch.Size([10])
model.r2_2.0.conv.weight	cpu	torch.Size([24, 3, 5, 5])
model.r2_2.0.conv.bias	cpu	torch.Size([24])
model.r2_2.2.conv.weight	cpu	torch.Size([48, 24, 3, 3])
model.r2_2.2.conv.bias	cpu	torch.Size([48])
model.r2_2.4.conv.wei

In [9]:
# Change model weights tensors to be cuda tensors if is_cuda is true and cuda is available

if is_cuda and torch.cuda.is_available():
    print("Changing to cuda weights")
    model.cuda()

Changing to cuda weights


In [10]:
#training configuration

momentum = 0.9
disp_interval = 2
log_interval = 250

train_loss = 0
step_cnt = 0
re_cnt = False
t = Timer()
t.tic()

# Set initial values
best_mae, best_mse, best_epoch = 999999, 999999, 0


In [11]:
# Hyperparameters

learning_rate = 0.00001
epochs = 10

# construct an optimizer

params = [p for p in model.parameters() if p.requires_grad]
#optimizer = torch.optim.SGD(params, lr=learning_rate, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.Adam(params, lr=learning_rate)


In [12]:
# Load the images, take note the num_pool argument

data_loader = ImageDataLoader(train_path, shuffle=False, pre_load=False, num_pool = 2 )
data_loader_val = ImageDataLoader(val_path, shuffle=False, pre_load=False, num_pool = 2)



In [13]:
#Tensorboard  config
use_tensorboard = True

writer = SummaryWriter(f'../output/tensorboard/runs')



In [15]:
saved_weights = []

for epoch in range(epochs):    
    step = -1
    train_loss = 0
    for blob in data_loader:                
        step = step + 1        
        im_data = blob['data']
        gt_data = blob['gt_density']
        
        # Forward pass
        density_map = model(im_data, gt_data)
        loss = model.loss
        train_loss += loss.data
        
        # Write to tensorboard
        writer.add_scalar("Loss/train", train_loss, epoch)

        # Reset zero gradient and backpropagate
        step_cnt += 1
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % disp_interval == 0:            
            duration = t.toc(average=False)
            fps = step_cnt / duration
            gt_count = np.sum(gt_data)    

            density_map = density_map.data.cpu().numpy()
            et_count = np.sum(density_map)
            utils.save_results(im_data,gt_data,density_map, output_dir)
            log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch,
                step, 1./fps, gt_count,et_count)
            log_print(log_text, color='green', attrs=['bold'])
            re_cnt = True    
    
       
        if re_cnt:                                
            t.tic()
            re_cnt = False

    save_name = os.path.join(output_dir, f'{method}_{dataset_name}_{epoch}.h5')
    saved_weights.append(save_name)
    network.save_net(save_name, model)



[1m[32mepoch:    0, step    0, Time: 3.0957s, gt_cnt:  8.7, et_cnt: 13.0[0m
[1m[32mepoch:    0, step    2, Time: 0.0149s, gt_cnt:  1.6, et_cnt: 12.8[0m
[1m[32mepoch:    0, step    4, Time: 0.0146s, gt_cnt:  3.3, et_cnt: 12.3[0m
[1m[32mepoch:    0, step    6, Time: 0.0144s, gt_cnt:  3.1, et_cnt: 11.5[0m
[1m[32mepoch:    0, step    8, Time: 0.0142s, gt_cnt:  5.9, et_cnt: 10.7[0m
[1m[32mepoch:    0, step   10, Time: 0.0140s, gt_cnt:  1.8, et_cnt:  9.6[0m
[1m[32mepoch:    0, step   12, Time: 0.0137s, gt_cnt:  2.7, et_cnt:  8.9[0m
[1m[32mepoch:    0, step   14, Time: 0.0134s, gt_cnt: 20.4, et_cnt:  8.1[0m
[1m[32mepoch:    0, step   16, Time: 0.0132s, gt_cnt:  6.2, et_cnt:  7.6[0m
[1m[32mepoch:    0, step   18, Time: 0.0130s, gt_cnt: 18.3, et_cnt:  7.4[0m
[1m[32mepoch:    0, step   20, Time: 0.0128s, gt_cnt:  6.7, et_cnt:  7.2[0m
[1m[32mepoch:    0, step   22, Time: 0.0126s, gt_cnt:  2.7, et_cnt:  7.1[0m
[1m[32mepoch:    0, step   24, Time: 0.0124s, gt_c

[1m[32mepoch:    2, step    8, Time: 0.0054s, gt_cnt:  5.9, et_cnt: 10.7[0m
[1m[32mepoch:    2, step   10, Time: 0.0054s, gt_cnt:  1.8, et_cnt:  9.5[0m
[1m[32mepoch:    2, step   12, Time: 0.0053s, gt_cnt:  2.7, et_cnt:  9.0[0m
[1m[32mepoch:    2, step   14, Time: 0.0053s, gt_cnt: 20.4, et_cnt:  8.2[0m
[1m[32mepoch:    2, step   16, Time: 0.0053s, gt_cnt:  6.2, et_cnt:  7.6[0m
[1m[32mepoch:    2, step   18, Time: 0.0053s, gt_cnt: 18.3, et_cnt:  7.6[0m
[1m[32mepoch:    2, step   20, Time: 0.0052s, gt_cnt:  6.7, et_cnt:  7.1[0m
[1m[32mepoch:    2, step   22, Time: 0.0052s, gt_cnt:  2.7, et_cnt:  7.0[0m
[1m[32mepoch:    2, step   24, Time: 0.0052s, gt_cnt:  9.2, et_cnt:  6.9[0m
[1m[32mepoch:    2, step   26, Time: 0.0051s, gt_cnt:  2.7, et_cnt:  7.3[0m
[1m[32mepoch:    2, step   28, Time: 0.0051s, gt_cnt:  2.7, et_cnt:  7.5[0m
[1m[32mepoch:    2, step   30, Time: 0.0051s, gt_cnt: 21.4, et_cnt:  7.4[0m
[1m[32mepoch:    2, step   32, Time: 0.0050s, gt_c

[1m[32mepoch:    4, step   16, Time: 0.0033s, gt_cnt:  6.2, et_cnt:  7.7[0m
[1m[32mepoch:    4, step   18, Time: 0.0033s, gt_cnt: 18.3, et_cnt:  7.7[0m
[1m[32mepoch:    4, step   20, Time: 0.0033s, gt_cnt:  6.7, et_cnt:  7.1[0m
[1m[32mepoch:    4, step   22, Time: 0.0033s, gt_cnt:  2.7, et_cnt:  7.0[0m
[1m[32mepoch:    4, step   24, Time: 0.0033s, gt_cnt:  9.2, et_cnt:  6.9[0m
[1m[32mepoch:    4, step   26, Time: 0.0033s, gt_cnt:  2.7, et_cnt:  7.4[0m
[1m[32mepoch:    4, step   28, Time: 0.0033s, gt_cnt:  2.7, et_cnt:  7.5[0m
[1m[32mepoch:    4, step   30, Time: 0.0032s, gt_cnt: 21.4, et_cnt:  7.4[0m
[1m[32mepoch:    4, step   32, Time: 0.0032s, gt_cnt:  6.7, et_cnt:  7.7[0m
[1m[32mepoch:    4, step   34, Time: 0.0032s, gt_cnt: 39.8, et_cnt:  8.0[0m
[1m[32mepoch:    4, step   36, Time: 0.0032s, gt_cnt:  1.3, et_cnt:  8.9[0m
[1m[32mepoch:    4, step   38, Time: 0.0032s, gt_cnt: 14.8, et_cnt:  9.6[0m
[1m[32mepoch:    4, step   40, Time: 0.0032s, gt_c

[1m[32mepoch:    6, step   24, Time: 0.0024s, gt_cnt:  9.2, et_cnt:  6.5[0m
[1m[32mepoch:    6, step   26, Time: 0.0024s, gt_cnt:  2.7, et_cnt:  7.3[0m
[1m[32mepoch:    6, step   28, Time: 0.0024s, gt_cnt:  2.7, et_cnt:  7.3[0m
[1m[32mepoch:    6, step   30, Time: 0.0024s, gt_cnt: 21.4, et_cnt:  7.3[0m
[1m[32mepoch:    6, step   32, Time: 0.0024s, gt_cnt:  6.7, et_cnt:  7.7[0m
[1m[32mepoch:    6, step   34, Time: 0.0024s, gt_cnt: 39.8, et_cnt:  8.1[0m
[1m[32mepoch:    6, step   36, Time: 0.0024s, gt_cnt:  1.3, et_cnt:  9.1[0m
[1m[32mepoch:    6, step   38, Time: 0.0023s, gt_cnt: 14.8, et_cnt:  9.7[0m
[1m[32mepoch:    6, step   40, Time: 0.0023s, gt_cnt:  9.7, et_cnt: 10.9[0m
[1m[32mepoch:    6, step   42, Time: 0.0023s, gt_cnt: 10.8, et_cnt: 10.9[0m
[1m[32mepoch:    6, step   44, Time: 0.0023s, gt_cnt: 11.6, et_cnt: 11.4[0m
[1m[32mepoch:    6, step   46, Time: 0.0023s, gt_cnt: 21.8, et_cnt: 11.1[0m
[1m[32mepoch:    6, step   48, Time: 0.0023s, gt_c

[1m[32mepoch:    8, step   32, Time: 0.0019s, gt_cnt:  6.7, et_cnt:  7.7[0m
[1m[32mepoch:    8, step   34, Time: 0.0019s, gt_cnt: 39.8, et_cnt:  8.2[0m
[1m[32mepoch:    8, step   36, Time: 0.0019s, gt_cnt:  1.3, et_cnt:  9.3[0m
[1m[32mepoch:    8, step   38, Time: 0.0018s, gt_cnt: 14.8, et_cnt: 10.0[0m
[1m[32mepoch:    8, step   40, Time: 0.0018s, gt_cnt:  9.7, et_cnt: 11.5[0m
[1m[32mepoch:    8, step   42, Time: 0.0018s, gt_cnt: 10.8, et_cnt: 11.1[0m
[1m[32mepoch:    8, step   44, Time: 0.0018s, gt_cnt: 11.6, et_cnt: 11.6[0m
[1m[32mepoch:    8, step   46, Time: 0.0018s, gt_cnt: 21.8, et_cnt: 11.1[0m
[1m[32mepoch:    8, step   48, Time: 0.0018s, gt_cnt: 18.1, et_cnt: 11.6[0m
[1m[32mepoch:    8, step   50, Time: 0.0018s, gt_cnt:  3.7, et_cnt: 11.5[0m
[1m[32mepoch:    8, step   52, Time: 0.0018s, gt_cnt: 21.7, et_cnt: 11.3[0m
[1m[32mepoch:    8, step   54, Time: 0.0018s, gt_cnt:  2.8, et_cnt: 12.6[0m
[1m[32mepoch:    8, step   56, Time: 0.0018s, gt_c

In [None]:
#calculate error on the validation dataset 
for weights in saved_weights:
    mae,mse = evaluate_model(weights, data_loader_val, is_cuda=is_cuda)
    if mae < best_mae:
        best_mae = mae
        best_mse = mse
        best_model = '{}_{}_{}.h5'.format(method,dataset_name,epoch)


    log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch,mae,mse)
    log_print(log_text, color='green', attrs=['bold'])
    log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % (best_mae,best_mse, best_model)
    log_print(log_text, color='green', attrs=['bold'])

    if use_tensorboard:
        writer.add_scalar("MAE", mae, epoch)
        writer.add_scalar("MSE", train_loss, epoch)
        writer.add_scalar("train_loss", train_loss/data_loader.get_num_samples(), epoch)
