In [1]:
import os
import torch
import numpy as np
import pandas as pd
import sys
import random
import h5py
import torch
import torchvision
from torch.utils.tensorboard import SummaryWriter

sys.path.append('../')

from models.csrnet_pytorch.src import *
from models.csrnet_pytorch.src.crowd_count import *
from models.csrnet_pytorch.src.network import *
from models.csrnet_pytorch.src.data_loader import ImageDataLoader
from models.csrnet_pytorch.src.timer import *
from models.csrnet_pytorch.src.evaluate_model import *
from models.csrnet_pytorch.src import utils

In [2]:
# Check to see if device can be trained on GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

print(device)


cuda


In [3]:
# Cuda configurations

torch.cuda.empty_cache()
print(torch.cuda.memory_summary(device=None, abbreviated=False))

current_device = torch.cuda.current_device()
current_device_name = torch.cuda.get_device_name(current_device)
memory_allocated = torch.cuda.memory_allocated()
memory_cached = torch.cuda.memory_cached()

print(
    f'Using gpu {current_device_name} with device number {current_device}.\n'
    f'Memory allocated = {memory_allocated}\n'
    f'Memory cached = {memory_cached}'
)

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------



In [4]:
try:
    from termcolor import cprint
except ImportError:
    cprint = None


def log_print(text, color=None, on_color=None, attrs=None):
    if cprint is not None:
        cprint(text, color=color, on_color=on_color, attrs=attrs)
    else:
        print(text)

In [5]:
# Directory Configurations

method = 'csrnet'
dataset_name = 'JHU'
output_dir = f'../output/{method}/saved_models/{dataset_name}'

# Training data path
train_path = '../data/JHU/train/consolidated'
train_gt_path = '../data/JHU/train/gt'

# Validation data path
val_path = '../data/JHU/val/consolidated'
val_gt_path = '../data/JHU/val/gt'

In [6]:
# Create output directory if it doesnt exist

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [7]:
# load model

is_cuda = True  # Determine if we should use the CPU to train or GPU

model = CrowdCounter(is_cuda=is_cuda)  # is_cuda determines if all the input tensors should be converted to cuda tensors
network.weights_normal_init(model, dev=0.01)
model.train()

CrowdCounter(
  (model): CSRNet(
    (column): Sequential(
      (0): Conv2d(
        (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace=True)
      )
      (1): Conv2d(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace=True)
      )
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(
        (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace=True)
      )
      (4): Conv2d(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace=True)
      )
      (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(
        (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (relu): ReLU(inplace=True)
      )
      (7): Conv2d(
        (conv): Conv2d(256

In [8]:
# Model parameters

for name, param in model.named_parameters():
    print(f'{name}\t{param.device}\t{param.shape}')

# Print model's state_dict
print("\nModel's state_dict: \n")
for k, v in model.state_dict().items():
    print(k, "\t", v.dtype)

model.column.0.conv.weight	cpu	torch.Size([64, 3, 3, 3])
model.column.0.conv.bias	cpu	torch.Size([64])
model.column.1.conv.weight	cpu	torch.Size([64, 64, 3, 3])
model.column.1.conv.bias	cpu	torch.Size([64])
model.column.3.conv.weight	cpu	torch.Size([128, 64, 3, 3])
model.column.3.conv.bias	cpu	torch.Size([128])
model.column.4.conv.weight	cpu	torch.Size([128, 128, 3, 3])
model.column.4.conv.bias	cpu	torch.Size([128])
model.column.6.conv.weight	cpu	torch.Size([256, 128, 3, 3])
model.column.6.conv.bias	cpu	torch.Size([256])
model.column.7.conv.weight	cpu	torch.Size([256, 256, 3, 3])
model.column.7.conv.bias	cpu	torch.Size([256])
model.column.8.conv.weight	cpu	torch.Size([256, 256, 3, 3])
model.column.8.conv.bias	cpu	torch.Size([256])
model.column.10.conv.weight	cpu	torch.Size([512, 256, 3, 3])
model.column.10.conv.bias	cpu	torch.Size([512])
model.column.11.conv.weight	cpu	torch.Size([512, 512, 3, 3])
model.column.11.conv.bias	cpu	torch.Size([512])
model.column.12.conv.weight	cpu	torch.Siz

In [9]:
# Change model weights tensors to be cuda tensors if is_cuda is true and cuda is available

if is_cuda and torch.cuda.is_available():
    print("Changing to cuda weights")
    model.cuda()

Changing to cuda weights


In [10]:
#training configuration

momentum = 0.9
disp_interval = 1
log_interval = 250

train_loss = 0
step_cnt = 0
re_cnt = False
t = Timer()
t.tic()

# Set initial values
best_mae, best_mse, best_epoch = 999999, 999999, 0

In [11]:
# Hyperparameters

learning_rate = 0.0001
epochs = 100

# construct an optimizer

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=learning_rate)

In [12]:
# Load the images, take note the num_pool argument

data_loader = ImageDataLoader(train_path, shuffle=False, pre_load=False, num_pool = 3)
data_loader_val = ImageDataLoader(val_path, shuffle=False, pre_load=False, num_pool = 3)

print('Training instances: {}'.format(data_loader.get_num_samples()))
print('Validation instances: {}'.format(data_loader_val.get_num_samples()))

Training instances: 107
Validation instances: 50


In [13]:
#Tensorboard  config
use_tensorboard = True

writer = SummaryWriter(f'../output/tensorboard/runs')

layout = {
    
    'MAE': {'Weather': ['Margin', ['Weather/None', 'Weather/Fog', 'Weather/Rain', 'Weather/Snow']],
           'Crowd Density': ['Margin', ['Crowd Density/Low', 'Crowd Density/Med', 'Crowd Density/High']]},
    
    'MSE': {'Weather': ['Margin', ['Weather/None', 'Weather/Fog', 'Weather/Rain', 'Weather/Snow']],
           'Crowd Density': ['Margin', ['Crowd Density/Low', 'Crowd Density/Med', 'Crowd Density/High']]}
}

#writer.add_custom_scalars(layout)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [14]:
for epoch in range(epochs):    
    step = -1
    train_loss = 0
    for id, blob in enumerate(data_loader):                
        step = step + 1        
        im_data = blob['data']
        gt_data = blob['gt_density']
        metadata = blob['metadata']
                        
        # Forward pass
        density_map = model(im_data, gt_data)
        
        loss = model.loss
        train_loss += loss.data
        
        # Write to tensorboard
        writer.add_scalar("Loss/train", train_loss, epoch)

        # Reset zero gradient and backpropagate
        step_cnt += 1
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % disp_interval == 0:            
            duration = t.toc(average=False)
            fps = step_cnt / duration
            gt_count = np.sum(gt_data)    

            density_map = density_map.data.cpu().numpy()
            et_count = np.sum(density_map)
            utils.save_results(im_data,gt_data,density_map, output_dir)
            log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %7.1f, et_cnt: %7.1f' % (epoch,step, 1./fps, gt_count, et_count)
            log_print(log_text, color='green', attrs=['bold'])
            re_cnt = True
       
        if re_cnt:                                
            t.tic()
            re_cnt = False
        break

    # Overwrite the current model weights
    current_model = f'{method}_{learning_rate}.h5'
    save_name = os.path.join(output_dir, current_model)
    network.save_net(save_name, model)
            
    # Evaluate the mae and mse results by doing a forward pass against the validation dataset i.e data_loader_val for
    # each epoch
    MAEcrowddensity, MSEcrowddensity, MAEweather, MSEweather, MAE, MSE = evaluate_model(save_name, data_loader_val, is_cuda=is_cuda)
    
    # Pocket algorithm: Check to see if the current epoch mae is better than the best recorded one,
    # If it is, then overwrite the current best .h5 weights file
    if MAE < best_mae:
        # Save the new best mae and mse
        best_mae = MAE
        best_mse = MSE
        best_epoch = epoch
        best_model = f'best_model_epoch_{best_epoch}_{method}_{learning_rate}.h5'

        # Overwrite or create a new file for the best model for this learning rate
        save_name = os.path.join(output_dir, best_model)
        network.save_net(save_name, model)
        
    # Print out the best epoch that beat the current best mae
    log_text = f'EPOCH: {epoch}, MAE: {MAE}, MSE: {MSE}'
    log_print(log_text, color='blue', attrs=['bold'])

    # Save the results to tensorboard for each epoch
    if use_tensorboard:
        
        # overall segment
        writer.add_scalar("Overall/MAE", MAE, epoch)
        writer.add_scalar("Overall/MSE", train_loss, epoch)
        writer.add_scalar("Overall/train_loss", train_loss / data_loader.get_num_samples(), epoch)
        
        # crowd density segment
        writer.add_scalar('High/MAE', MAEcrowddensity['High'], epoch)
        writer.add_scalar('High/MSE', MSEcrowddensity['High'], epoch)
        
        writer.add_scalar('Med/MAE', MAEcrowddensity['Med'], epoch)
        writer.add_scalar('Med/MSE', MSEcrowddensity['Med'], epoch)
        
        writer.add_scalar('Low/MAE', MAEcrowddensity['Low'], epoch)
        writer.add_scalar('Low/MSE', MSEcrowddensity['Low'], epoch)
        
        # weather segment
        writer.add_scalar('None/MAE', MAEweather['None'], epoch)
        writer.add_scalar('None/MSE', MSEweather['None'], epoch)
        
        writer.add_scalar('Fog/MAE', MAEweather['Fog'], epoch)
        writer.add_scalar('Fog/MSE', MSEweather['Fog'], epoch)
        
        writer.add_scalar('Rain/MAE', MAEweather['Rain'], epoch)
        writer.add_scalar('Rain/MSE', MSEweather['Rain'], epoch)
        
        writer.add_scalar('Snow/MAE', MAEweather['Snow'], epoch)
        writer.add_scalar('Snow/MSE', MSEweather['Snow'], epoch)

[1m[32mepoch:    0, step    0, Time: 1.8387s, gt_cnt: 23869.9, et_cnt:     0.0[0m


  v = Variable(torch.as_tensor(x).type(dtype), requires_grad = False, volatile = True)


[1m[34mEPOCH: 0, MAE: 16082.142192687988, MSE: 806773205.8249829[0m


  density_map = 255 * density_map / np.max(density_map)


[1m[32mepoch:    1, step    0, Time: 13.6044s, gt_cnt: 23869.9, et_cnt:     0.0[0m


KeyboardInterrupt: 