In [1]:
import os
import torch
import numpy as np
import pandas as pd
import sys
import random
import h5py
from torch.utils.tensorboard import SummaryWriter

sys.path.append('../')

from models.smc.src import *
from models.smc.src.crowd_count import *
from models.smc.src.network import *
from models.smc.src.data_loader import ImageDataLoader
from models.smc.src.timer import *
from models.smc.src.evaluate_model import *
from models.smc.src import utils

In [2]:
# Check to see if device can be trained on GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

print(device)


cuda


In [3]:
# Cuda configurations

torch.cuda.empty_cache()
print(torch.cuda.memory_summary(device=None, abbreviated=False))

current_device = torch.cuda.current_device()
current_device_name = torch.cuda.get_device_name(current_device)
memory_allocated = torch.cuda.memory_allocated()
memory_cached = torch.cuda.memory_cached()

print(
    f'Using gpu {current_device_name} with device number {current_device}.\n'
    f'Memory allocated = {memory_allocated}\n'
    f'Memory cached = {memory_cached}'
)

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------



In [4]:
try:
    from termcolor import cprint
except ImportError:
    cprint = None


def log_print(text, color=None, on_color=None, attrs=None):
    if cprint is not None:
        cprint(text, color=color, on_color=on_color, attrs=attrs)
    else:
        print(text)

In [5]:
# Directory Configurations

method = 'smc'
dataset_name = 'JHU'
output_dir = f'../output/{method}/saved_models/{dataset_name}'

# Training data path
train_path = '../data/JHU/train/consolidated'
train_gt_path = '../data/JHU/train/gt'

# Validation data path
val_path = '../data/JHU/val/consolidated'
val_gt_path = '../data/JHU/val/gt'

In [6]:
# Create output directory if it doesnt exist

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [7]:
# load model

is_cuda = True  # Determine if we should use the CPU to train or GPU

model = CrowdCounter(is_cuda=is_cuda)  # is_cuda determines if all the input tensors should be converted to cuda tensors
network.weights_normal_init(model, dev=0.01)
model.train()


CrowdCounter(
  (model): SMC(
    (r1): Sequential(
      (0): Conv2d(
        (conv): Conv2d(3, 16, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
        (relu): ReLU(inplace=True)
      )
      (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): Conv2d(
        (conv): Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(
        (conv): Conv2d(32, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
      (5): Conv2d(
        (conv): Conv2d(16, 8, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
    )
    (r2_1): Sequential(
      (0): Conv2d(
        (conv): Conv2d(3, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
        (relu): ReLU(inplace=True)
      )
      (1): MaxPool2d(kernel_s

In [8]:
# Model parameters

for name, param in model.named_parameters():
    print(f'{name}\t{param.device}\t{param.shape}')

# Print model's state_dict
print("\nModel's state_dict: \n")
for k, v in model.state_dict().items():
    print(k, "\t", v.dtype)

model.r1.0.conv.weight	cpu	torch.Size([16, 3, 9, 9])
model.r1.0.conv.bias	cpu	torch.Size([16])
model.r1.2.conv.weight	cpu	torch.Size([32, 16, 7, 7])
model.r1.2.conv.bias	cpu	torch.Size([32])
model.r1.4.conv.weight	cpu	torch.Size([16, 32, 7, 7])
model.r1.4.conv.bias	cpu	torch.Size([16])
model.r1.5.conv.weight	cpu	torch.Size([8, 16, 7, 7])
model.r1.5.conv.bias	cpu	torch.Size([8])
model.r2_1.0.conv.weight	cpu	torch.Size([20, 3, 7, 7])
model.r2_1.0.conv.bias	cpu	torch.Size([20])
model.r2_1.2.conv.weight	cpu	torch.Size([40, 20, 5, 5])
model.r2_1.2.conv.bias	cpu	torch.Size([40])
model.r2_1.4.conv.weight	cpu	torch.Size([20, 40, 5, 5])
model.r2_1.4.conv.bias	cpu	torch.Size([20])
model.r2_1.5.conv.weight	cpu	torch.Size([10, 20, 5, 5])
model.r2_1.5.conv.bias	cpu	torch.Size([10])
model.r2_2.0.conv.weight	cpu	torch.Size([24, 3, 5, 5])
model.r2_2.0.conv.bias	cpu	torch.Size([24])
model.r2_2.2.conv.weight	cpu	torch.Size([48, 24, 3, 3])
model.r2_2.2.conv.bias	cpu	torch.Size([48])
model.r2_2.4.conv.wei

In [9]:
# Change model weights tensors to be cuda tensors if is_cuda is true and cuda is available

if is_cuda and torch.cuda.is_available():
    print("Changing to cuda weights")
    model.cuda()

Changing to cuda weights


In [10]:
#training configuration

disp_interval = 10

train_loss = 0
re_cnt = False
t = Timer()
t.tic()

# Set initial values
best_mae, best_mse, best_epoch = 999999, 999999, 0


In [11]:
# Hyperparameters

learning_rate = 0.0001
epochs = 100

# construct an optimizer

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=learning_rate)


In [12]:
# Load the images, take note the num_pool argument

data_loader = ImageDataLoader(train_path, shuffle=False, pre_load=False, num_pool = 2 )
data_loader_val = ImageDataLoader(val_path, shuffle=False, pre_load=False, num_pool = 2)

# Saved weights
save_name = None


In [13]:
from torch.autograd import Variable

#Tensorboard  config
use_tensorboard = True

writer = SummaryWriter(f'../output/tensorboard/runs/{learning_rate}_{epochs}')

# Add model to tensorboard
dummy_input = data_loader.get_dummy_input()

writer.add_graph(model.get_model(), Variable(torch.from_numpy(dummy_input['data']).type(torch.FloatTensor)).cuda())

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
for epoch in range(epochs):    
    step = -1
    train_loss = 0
    for blob in data_loader:                
        step = step + 1        
        im_data = blob['data']
        gt_data = blob['gt_density']
        
        # Forward pass
        density_map = model(im_data, gt_data)
        loss = model.loss
        train_loss += loss.data
        
        # Write to tensorboard
        writer.add_scalar(f"Loss/train_{epoch}", train_loss, step)

        # Reset zero gradient and backpropagate
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % disp_interval == 0:            
            duration = t.toc(average=False)
            gt_count = np.sum(gt_data)    

            density_map = density_map.data.cpu().numpy()
            et_count = np.sum(density_map)
            utils.save_results(im_data,gt_data,density_map, output_dir)
            
            # Log the results per display rate
            log_text = (
                'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch,
                step, duration, gt_count,et_count)
            )
            log_print(log_text, color='green', attrs=['bold'])
            
            # Reset timer
            re_cnt = True    
    
       
        if re_cnt:                                
            t.tic()
            re_cnt = False

    
    # Overwrite the current model weights
    current_model = f'{method}_{learning_rate}.h5'
    save_name = os.path.join(output_dir, current_model)
    network.save_net(save_name, model)

    # Evaluate the mae and mse results by doing a forward pass against the validation dataset i.e data_loader_val for
    # each epoch
    mae, mse = evaluate_model(save_name, data_loader_val, is_cuda=is_cuda)
    
    # Pocket algorithm: Check to see if the current epoch mae is better than the best recorded one,
    # If it is, then overwrite the current best .h5 weights file
    if mae < best_mae:
        # Save the new best mae and mse
        best_mae = mae
        best_mse = mse
        best_epoch = epoch
        best_model = f'best_model_epoch_{best_epoch}_{method}_{learning_rate}.h5'

        # Overwrite or create a new file for the best model for this learning rate
        save_name = os.path.join(output_dir, best_model)
        network.save_net(save_name, model)


    # Print out the best epoch that beat the current best mae
    log_text = f'EPOCH: {epoch}, MAE: {mae}, MSE: {mse}'
    log_print(log_text, color='green', attrs=['bold'])


    # Save the results to tensorboard for each epoch
    if use_tensorboard:
        writer.add_scalar("MAE", mae, epoch)
        writer.add_scalar("MSE", mse, epoch)
        writer.add_scalar("Train_loss", train_loss / data_loader.get_num_samples(), epoch)




[1m[32mepoch:    0, step    0, Time: 3.6299s, gt_cnt:  4.1, et_cnt:  0.0[0m
[1m[32mepoch:    0, step   10, Time: 11.1815s, gt_cnt: 16.9, et_cnt: 15.7[0m
[1m[32mepoch:    0, step   20, Time: 11.2100s, gt_cnt:  1.1, et_cnt: 22.6[0m
[1m[32mepoch:    0, step   30, Time: 11.1993s, gt_cnt:  1.6, et_cnt: 17.5[0m
[1m[32mepoch:    0, step   40, Time: 11.2527s, gt_cnt:  4.4, et_cnt: 24.4[0m
[1m[32mepoch:    0, step   50, Time: 11.3024s, gt_cnt: 26.4, et_cnt: 14.5[0m
[1m[32mepoch:    0, step   60, Time: 11.3354s, gt_cnt: 27.7, et_cnt:  9.2[0m
[1m[32mepoch:    0, step   70, Time: 11.4021s, gt_cnt: 36.3, et_cnt: 27.7[0m
[1m[32mepoch:    0, step   80, Time: 11.5918s, gt_cnt:  0.1, et_cnt: 28.8[0m
[1m[32mepoch:    0, step   90, Time: 11.5130s, gt_cnt:  9.0, et_cnt: 17.7[0m


  v = Variable(torch.as_tensor(x).type(dtype), requires_grad = False, volatile = True)


[1m[32mEPOCH: 0, MAE: 16.146729588508606, MSE: 30.063693682628347[0m
[1m[32mepoch:    1, step    0, Time: 72.5520s, gt_cnt:  4.1, et_cnt: 11.4[0m
[1m[32mepoch:    1, step   10, Time: 11.7018s, gt_cnt: 16.9, et_cnt: 20.6[0m
[1m[32mepoch:    1, step   20, Time: 12.3150s, gt_cnt:  1.1, et_cnt: 26.7[0m
[1m[32mepoch:    1, step   30, Time: 12.8534s, gt_cnt:  1.6, et_cnt: 17.5[0m
[1m[32mepoch:    1, step   40, Time: 11.7888s, gt_cnt:  4.4, et_cnt: 22.7[0m
[1m[32mepoch:    1, step   50, Time: 13.5220s, gt_cnt: 26.4, et_cnt: 14.3[0m
[1m[32mepoch:    1, step   60, Time: 12.0278s, gt_cnt: 27.7, et_cnt:  9.7[0m
[1m[32mepoch:    1, step   70, Time: 11.9341s, gt_cnt: 36.3, et_cnt: 28.3[0m
[1m[32mepoch:    1, step   80, Time: 15.1119s, gt_cnt:  0.1, et_cnt: 29.0[0m
[1m[32mepoch:    1, step   90, Time: 11.9226s, gt_cnt:  9.0, et_cnt: 17.9[0m
[1m[32mEPOCH: 1, MAE: 16.184890823364256, MSE: 30.03268245334253[0m
[1m[32mepoch:    2, step    0, Time: 75.3303s, gt_cnt: 

In [None]:
log_text = f'BEST MAE: {best_mae}, BEST MSE: {best_mse}'
log_print(log_text, color='green', attrs=['bold'])
