In [23]:
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import models,transforms,datasets
from torch.utils import data
%matplotlib inline
import torchvision.transforms as standard_transforms
from torch.utils.data import DataLoader
import random
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from PIL import Image, ImageOps, ImageFilter
import numbers
import pdb
import pandas as pd
import torch.nn.functional as F
import torchvision.transforms.functional as F2
import torch.utils.model_zoo as model_zoo
from glob import glob
from torchvision import transforms, models
from torch.utils.data.dataloader import default_collate
from torch import optim
from torch.nn import Module
import time
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

Code can be run both on laptop and on GCP.

In [24]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

Using gpu: True 


unzip data

In [25]:
!unzip data.zip

Archive:  data.zip
replace data/bayes/train/IMG_290.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


## Models

We use 2 nn, a CSRNet and a VGG19 extented.

#### VGG19 extended

In [26]:
__all__ = ['vgg19']
model_urls = {
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
}

class VGGExtended(nn.Module):
    def __init__(self, features):
        super(VGGExtended, self).__init__()
        self.features = features
        self.reg_layer = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 1, 1)
        )

    def forward(self, x):
        x = self.features(x)
        x = F.upsample_bilinear(x, scale_factor=2)
        x = self.reg_layer(x)
        return torch.abs(x)


def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


cfg = {
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512]
}

def vgg19():
    """VGG 19-layer model (configuration "E")
        model pre-trained on ImageNet
    """
    model = VGGExtended(make_layers(cfg['E']))
    model.load_state_dict(model_zoo.load_url(model_urls['vgg19']), strict=False)
    return model

#### CSRNet

In [27]:
class CSRNet(nn.Module):
    def __init__(self, load_weights=False):
        super(CSRNet, self).__init__()
        self.seen = 0
        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat  = [512, 512, 512,256,128,64]
        self.frontend = make_layers(self.frontend_feat)
        self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
        if not load_weights:
            mod = models.vgg16(pretrained = True)
            self._initialize_weights()
            self.frontend.load_state_dict(mod.features[0:23].state_dict())
            
    def forward(self,x):
        size = x.size()
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        x = F.upsample(x, size = size[2:])
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0) 

## Processing helpers

In [28]:
def random_cropBayes(im_h, im_w, crop_h, crop_w):
    res_h = im_h - crop_h
    res_w = im_w - crop_w
    i = random.randint(0, res_h)
    j = random.randint(0, res_w)
    return i, j, crop_h, crop_w


def cal_innner_area(c_left, c_up, c_right, c_down, bbox):
    inner_left = np.maximum(c_left, bbox[:, 0])
    inner_up = np.maximum(c_up, bbox[:, 1])
    inner_right = np.minimum(c_right, bbox[:, 2])
    inner_down = np.minimum(c_down, bbox[:, 3])
    inner_area = np.maximum(inner_right-inner_left, 0.0) * np.maximum(inner_down-inner_up, 0.0)
    return inner_area

## Datasets

#### Ground Truth dataset

In [29]:
class GTDataset(data.Dataset):
    def __init__(self, data_path, mode, main_transform=None, img_transform=None, gt_transform=None):
        self.img_path = data_path + '/img'
        self.gt_path = data_path + '/den'
        self.data_files = [filename for filename in os.listdir(self.img_path) \
                           if os.path.isfile(os.path.join(self.img_path,filename))]
        self.num_samples = len(self.data_files) 
        self.main_transform=main_transform  
        self.img_transform = img_transform
        self.gt_transform = gt_transform     
    
    def __getitem__(self, index):
        fname = self.data_files[index]
        img, den = self.read_image_and_gt(fname)      
        if self.main_transform is not None:
            img, den = self.main_transform(img,den) 
        if self.img_transform is not None:
            img = self.img_transform(img)         
        if self.gt_transform is not None:
            den = self.gt_transform(den)               
        return img, den

    def __len__(self):
        return self.num_samples

    def read_image_and_gt(self,fname):
        img = Image.open(os.path.join(self.img_path,fname))
        if img.mode == 'L':
            img = img.convert('RGB')

        den = pd.read_csv(os.path.join(self.gt_path,os.path.splitext(fname)[0] + '.csv'), sep=',',header=None).values
        
        den = den.astype(np.float32, copy=False)    
        den = Image.fromarray(den)  
        return img, den    

    def get_num_samples(self):
        return self.num_samples

#### Bayes method Dataset

In [62]:
class BayesDataset(data.Dataset):
    def __init__(self, root_path, crop_size,
                 downsample_ratio, is_gray=False,
                 method='train'):

        self.root_path = root_path
        self.im_list = sorted(glob(os.path.join(self.root_path, '*.jpg')))
        if method not in ['train', 'val', 'test']:
            raise Exception("not implement")
        self.method = method

        self.c_size = crop_size
        self.d_ratio = downsample_ratio
        assert self.c_size % self.d_ratio == 0
        self.dc_size = self.c_size // self.d_ratio

        if is_gray:
            self.trans = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
            ])
        else:
            self.trans = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Pour CSRNet à vérifier si besoin de modif
            ])

    def __len__(self):
        return len(self.im_list)

    def __getitem__(self, item):
        img_path = self.im_list[item]
        gd_path = img_path.replace('jpg', 'npy')
        img = Image.open(img_path).convert('RGB')
        if self.method == 'train':
            keypoints = np.load(gd_path)
            return self.train_transform(img, keypoints)
        else:
            keypoints = np.load(gd_path)
            img = self.trans(img)
            name = os.path.basename(img_path).split('.')[0]
            return img, len(keypoints), name

    def train_transform(self, img, keypoints):
        """random crop image patch and find people in it"""
        
        """
        Les keypoints correspondent aux coordonnées des têtes
        MAIS une troisième coordonnée a été calculée lors du preprocessing des données,
        elle correspont à "dis" et semble important pour calculer pas mal de choses
        """
        
        wd, ht = img.size
        st_size = min(wd, ht)
        assert st_size >= self.c_size
        assert len(keypoints) > 0
        i, j, h, w = random_cropBayes(ht, wd, self.c_size, self.c_size)
        img = F2.crop(img, i, j, h, w)
        
        nearest_dis = np.clip(keypoints[:, 2], 4.0, 128.0)
       
        points_left_up = keypoints[:, :2] - nearest_dis[:, None] / 2.0
        points_right_down = keypoints[:, :2] + nearest_dis[:, None] / 2.0
        bbox = np.concatenate((points_left_up, points_right_down), axis=1)
        inner_area = cal_innner_area(j, i, j+w, i+h, bbox)
        origin_area = nearest_dis * nearest_dis
        ratio = np.clip(1.0 * inner_area / origin_area, 0.0, 1.0)
        mask = (ratio >= 0.3)

        target = ratio[mask]
        keypoints = keypoints[mask]
        keypoints = keypoints[:, :2] - [j, i]  # change coodinate
        if len(keypoints) > 0:
            if random.random() > 0.5:
                img = F2.hflip(img)
                keypoints[:, 0] = w - keypoints[:, 0]
        else:
            if random.random() > 0.5:
                img = F2.hflip(img)
        return self.trans(img), torch.from_numpy(keypoints.copy()).float(), \
               torch.from_numpy(target.copy()).float(), st_size

## DataLoader

#### Loading Data GT

In [94]:
#CSRNet
LABEL_FACTOR = 1


def random_crop_GT(img,den,dst_size):
    # dst_size: ht, wd

    _,ts_hd,ts_wd = img.shape

    x1 = random.randint(0, ts_wd - dst_size[1])//LABEL_FACTOR*LABEL_FACTOR
    y1 = random.randint(0, ts_hd - dst_size[0])//LABEL_FACTOR*LABEL_FACTOR
    x2 = x1 + dst_size[1]
    y2 = y1 + dst_size[0]

    label_x1 = x1//LABEL_FACTOR
    label_y1 = y1//LABEL_FACTOR
    label_x2 = x2//LABEL_FACTOR
    label_y2 = y2//LABEL_FACTOR

    return img[:,y1:y2,x1:x2], den[label_y1:label_y2,label_x1:label_x2]



def share_memory(batch):
    out = None
    if False:
        # If we're in a background process, concatenate directly into a
        # shared memory tensor to avoid an extra copy
        numel = sum([x.numel() for x in batch])
        storage = batch[0].storage()._new_shared(numel)
        out = batch[0].new(storage)
    return out

crop_size = 256

def GT_collate(batch):
    # @GJY 
    r"""Puts each data field into a tensor with outer dimension batch size"""

    transposed = list(zip(*batch)) # imgs and dens
    imgs, dens = [transposed[0],transposed[1]]


    error_msg = "batch must contain tensors; found {}"
    if isinstance(imgs[0], torch.Tensor) and isinstance(dens[0], torch.Tensor):
        
        cropped_imgs = []
        cropped_dens = []
        for i_sample in range(len(batch)):
            _img, _den = random_crop_GT(imgs[i_sample],dens[i_sample],[crop_size,crop_size])
            cropped_imgs.append(_img)
            cropped_dens.append(_den)


        cropped_imgs = torch.stack(cropped_imgs, 0, out=share_memory(cropped_imgs))
        cropped_dens = torch.stack(cropped_dens, 0, out=share_memory(cropped_dens))

        return [cropped_imgs,cropped_dens]

    raise TypeError((error_msg.format(type(batch[0]))))


def loading_data_GT(batch_size=5, num_workers=8):
    mean_std = ([0.410824894905, 0.370634973049, 0.359682112932], [0.278580576181, 0.26925137639, 0.27156367898])
    log_para = 100.
    factor = 1
    # DATA_PATH = "/home/simon/Bureau/framework-crowd-counting/ProcessedData/shanghaitech_part_A"
    DATA_PATH = "data/gt"
    
    
    train_main_transform = Compose([
        RandomHorizontallyFlip()
    ])
    img_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
    gt_transform = standard_transforms.Compose([
        GTScaleDown(factor),
        LabelNormalize(log_para)
    ])

    train_set = GTDataset(DATA_PATH+'/train', 'train',main_transform=train_main_transform, img_transform=img_transform, gt_transform=gt_transform)
    train_loader =None
    if batch_size == 1:
        train_loader = DataLoader(train_set, batch_size=1, shuffle=True, drop_last=True)
    elif batch_size > 1:
        train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, collate_fn=GT_collate, shuffle=True, drop_last=True)
    
    val_set = GTDataset(DATA_PATH+'/val', 'val', main_transform=None, img_transform=img_transform, gt_transform=gt_transform)
    val_loader = DataLoader(val_set, batch_size=1, num_workers=num_workers, shuffle=True, drop_last=False)
    
    test_set = GTDataset(DATA_PATH+'/test', 'test', main_transform=None, img_transform=img_transform, gt_transform=gt_transform)
    test_loader = DataLoader(test_set, batch_size=1, num_workers=num_workers, shuffle=True, drop_last=False)
    
    return train_loader, val_loader, test_loader




#### Loading Data Bayes

In [95]:
#bayes
downsample_ratio = 8 # Mettre à 8 pour le réseau du répo (à 1 pour CSRNet puisque on ne modifie pas la dim avec le réseau)
data_dir = "data/bayes"
#data_dir = "/home/simon/Bureau/framework-crowd-counting/processed_data_bcc/SHHA"
#data_dir = "/Users/VictoRambaud/dev/crowd_counting2/ProcessedData/SHHA"
crop_size = 256
is_gray = False

def train_collate(batch):
    transposed_batch = list(zip(*batch))
    images = torch.stack(transposed_batch[0], 0)
    points = transposed_batch[1]  # the number of points is not fixed, keep it as a list of tensor
    targets = transposed_batch[2]
    st_sizes = torch.FloatTensor(transposed_batch[3])
    return images, points, targets, st_sizes


def loading_data_Bayes(batch_size = 5, num_workers = 8):
    datasets_bayes = {x: BayesDataset(os.path.join(data_dir, x),
                              crop_size,
                              downsample_ratio,
                              is_gray, x) for x in ['train', 'val', 'test']}

    dataloaders_bayes = {x: DataLoader(datasets_bayes[x],
                                collate_fn=(train_collate if x == 'train' else default_collate),
                                batch_size=(batch_size if x == 'train' else 1),
                                shuffle=(True if x == 'train' else False),
                                num_workers=num_workers,
                                pin_memory=(True if x == 'train' else False))
                                for x in ['train', 'val', 'test']}
    
    dataloaders_bayes_test = "To do"
    
    return dataloaders_bayes["train"], dataloaders_bayes["val"], dataloaders_bayes["test"]
    

In [96]:
loading_data_Bayes()

(<torch.utils.data.dataloader.DataLoader at 0x7fe65d43f250>,
 <torch.utils.data.dataloader.DataLoader at 0x7fe65d2b7c50>,
 <torch.utils.data.dataloader.DataLoader at 0x7fe65d2b7f90>)

## Bayes : computing losses

In [97]:
class Post_Prob(Module):
    def __init__(self, sigma, c_size, stride, background_ratio, use_background, device):
        super(Post_Prob, self).__init__()
        assert c_size % stride == 0

        self.sigma = sigma
        self.bg_ratio = background_ratio
        self.device = device
        # coordinate is same to image space, set to constant since crop size is same
        self.cood = torch.arange(0, c_size, step=stride,
                                 dtype=torch.float32, device=device) + stride / 2
        self.cood.unsqueeze_(0)
        self.softmax = torch.nn.Softmax(dim=0)
        self.use_bg = use_background

    def forward(self, points, st_sizes):
        num_points_per_image = [len(points_per_image) for points_per_image in points]
        all_points = torch.cat(points, dim=0)

        if len(all_points) > 0:
            x = all_points[:, 0].unsqueeze_(1)
            y = all_points[:, 1].unsqueeze_(1)
            x_dis = -2 * torch.matmul(x, self.cood) + x * x + self.cood * self.cood
            y_dis = -2 * torch.matmul(y, self.cood) + y * y + self.cood * self.cood
            y_dis.unsqueeze_(2)
            x_dis.unsqueeze_(1)
            dis = y_dis + x_dis
            dis = dis.view((dis.size(0), -1))

            dis_list = torch.split(dis, num_points_per_image)
            prob_list = []
            for dis, st_size in zip(dis_list, st_sizes):
                if len(dis) > 0:
                    if self.use_bg:
                        min_dis = torch.clamp(torch.min(dis, dim=0, keepdim=True)[0], min=0.0)
                        d = st_size * self.bg_ratio
                        bg_dis = (d - torch.sqrt(min_dis))**2
                        dis = torch.cat([dis, bg_dis], 0)  # concatenate background distance to the last
                    dis = -dis / (2.0 * self.sigma ** 2)
                    prob = self.softmax(dis)
                else:
                    prob = None
                prob_list.append(prob)
        else:
            prob_list = []
            for _ in range(len(points)):
                prob_list.append(None)
        return prob_list
    
    
class Bay_Loss(Module):
    def __init__(self, use_background, device):
        super(Bay_Loss, self).__init__()
        self.device = device
        self.use_bg = use_background

    def forward(self, prob_list, target_list, pre_density):
        loss = 0
        
        """
            - prob list semble être la listes des p(yn|xm) ie la contribution du pixel xm sur la n-ieme tête
            (les lignes de cette matrice sont de taille 4096 = 64*64)
            - pre density est la prédiction de la densité (sortie du réseau) - de taille 64x64 ici
            - target list a pour longueur le nombre de têtes - correspond aux E[cn] "réel" (le calcul reste un mystère)
            - On obtient les E[cn] estimées grâce à un produit terme à terme de prob_list et pre_density
        """
        
        
        for idx, prob in enumerate(prob_list):  # iterative through each sample
            if prob is None:  # image contains no annotation points
                pre_count = torch.sum(pre_density[idx])
                target = torch.zeros((1,), dtype=torch.float32, device=self.device)
            else:
                N = len(prob)
                if self.use_bg:
                    target = torch.zeros((N,), dtype=torch.float32, device=self.device)
                    target[:-1] = target_list[idx]
                else:
                    target = target_list[idx]
                pre_count = torch.sum(pre_density[idx].view((1, -1)) * prob, dim=1)  # flatten into vector
            
            loss += torch.sum(torch.abs(target - pre_count))
        loss = loss / len(prob_list)
        return loss

## Utils

In [98]:
# ===============================img tranforms============================

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, mask, bbx=None):
        if bbx is None:
            for t in self.transforms:
                img, mask = t(img, mask)
            return img, mask
        for t in self.transforms:
            img, mask, bbx = t(img, mask, bbx)
        return img, mask, bbx

class RandomHorizontallyFlip(object):
    def __call__(self, img, mask, bbx=None):
        if random.random() < 0.5:
            if bbx is None:
                return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose(Image.FLIP_LEFT_RIGHT)
            w, h = img.size
            xmin = w - bbx[:,3]
            xmax = w - bbx[:,1]
            bbx[:,1] = xmin
            bbx[:,3] = xmax
            return img.transpose(Image.FLIP_LEFT_RIGHT), mask.transpose(Image.FLIP_LEFT_RIGHT), bbx
        if bbx is None:
            return img, mask
        return img, mask, bbx



# ===============================label tranforms============================

class DeNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
        return tensor


class LabelNormalize(object):
    def __init__(self, para):
        self.para = para

    def __call__(self, tensor):
        # tensor = 1./(tensor+self.para).log()
        tensor = torch.from_numpy(np.array(tensor))
        tensor = tensor*self.para
        return tensor

    
class GTScaleDown(object):
    def __init__(self, factor=8):
        self.factor = factor

    def __call__(self, img):
        w, h = img.size
        if self.factor==1:
            return img
        tmp = np.array(img.resize((w//self.factor, h//self.factor), Image.BICUBIC))*self.factor*self.factor
        img = Image.fromarray(tmp)
        return img

## Trainers

Dossier à créer sur Google Cloud !

In [99]:
save_dir = "best_model_weights"

#### Trainer GT

In [100]:
PRINT_FREQ = 1
LOG_PARA = 100. # C'est quoi ce LOG_PARA ??
seed = 1


class Trainer_GT():
    def __init__(self, dataloader, net, loss, optimizer, validation_frequency=1, max_epoch=100):
        self.train_loader, self.val_loader, self.test_loader = dataloader()
        self.net = net
        self.loss = loss 
        self.optimizer = optimizer
        self.best_mae = 1e20
        self.best_mse = 1e20
        self.epoch = 0
        self.validation_frequency = validation_frequency
        self.max_epoch = max_epoch



    def train(self):
        for epoch in range(0, self.max_epoch):
            self.epoch = epoch
            # si on veut un lr sheduler il faut le mettre là
                
            # training    
            self.train_epoch()

            # validation
            if epoch%self.validation_frequency==0:
                self.validate()
                
        print(f'Train finished | best_mse: {self.best_mse} | best_mae: {self.best_mae}')


    def train_epoch(self): # training for all datasets
        self.net.train()
        
        for step, data in enumerate(self.train_loader, 0):
            img, gt_map = data
            img = Variable(img).to(device)
            print(img.size())
            gt_map = Variable(gt_map).to(device)

            self.optimizer.zero_grad()
            # pred_map = self.net(img, gt_map)
            
            pred_density_map = self.net(img)
            loss = self.loss(pred_density_map, gt_map)
            loss.backward()
            self.optimizer.step()
            
            gt_count = [int(gt_map[i].sum().data / LOG_PARA) for i in range(gt_map.size()[0])]
            pre_count = [int(pred_density_map[i].sum().data/LOG_PARA) for i in range(pred_density_map.size()[0])]
            
            print(f'epoch: {self.epoch} | step: {step} | count: {gt_count} | prediction: {pre_count} | loss: {loss}') 


    def validate(self):
        epoch_start = time.time()
        self.net.eval()
        epoch_res = []

        for vi, data in enumerate(self.val_loader, 0):
            img, gt_map = data

            with torch.no_grad():
                img = Variable(img).to(device)
                assert img.size(0) == 1
                gt_map = Variable(gt_map).to(device)
                pred_density_map = self.net(img)
                
                pred_cnt = int(gt_map[0].sum().data / LOG_PARA)
                gt_count = int(pred_density_map[0].sum().data/LOG_PARA)
                res = gt_count - pred_cnt
                    
                epoch_res.append(res)


        epoch_res = np.array(epoch_res)
        mse = np.sqrt(np.mean(np.square(epoch_res)))
        mae = np.mean(np.abs(epoch_res))
        
        print('Epoch {} Val, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec'
                     .format(self.epoch, mse, mae, time.time()-epoch_start))

        if (2.0 * mse + mae) < (2.0 * self.best_mse + self.best_mae):
            self.best_mse = mse
            self.best_mae = mae
            print("save best mse {:.2f} mae {:.2f} model epoch {}".format(self.best_mse,
                                                                            self.best_mae,
                                                                                 self.epoch))
            torch.save(self.net.state_dict(), os.path.join(save_dir, 'best_model_gt.pth'))


In [101]:
# Launch GT Train !
lr = 1e-5 

gt_net = CSRNet().to(device)
loss = nn.MSELoss().to(device)
optimizer = optim.Adam(gt_net.parameters(), lr=lr, weight_decay=1e-4)
# optimizer = optim.SGD(self.net.parameters(), cfg.LR, momentum=0.95,weight_decay=5e-4)  

gt_trainer = Trainer_GT(loading_data_GT, gt_net, loss, optimizer, max_epoch=2)
gt_trainer.train()

torch.Size([5, 3, 256, 256])


  return F.mse_loss(input, target, reduction=self.reduction)


epoch: 0 | step: 0 | count: [1, 449, 203, 33, 48] | prediction: [0, 0, 0, 0, 0] | loss: 0.3974553942680359
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 1 | count: [3, 5, 130, 185, 49] | prediction: [0, 0, 0, 0, 0] | loss: 0.08401802182197571
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 2 | count: [10, 0, 6, 120, 10] | prediction: [0, 0, 0, 0, 0] | loss: 0.033981602638959885
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 3 | count: [34, 56, 144, 64, 90] | prediction: [0, 0, 0, 0, 0] | loss: 0.15288524329662323
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 4 | count: [24, 97, 27, 30, 151] | prediction: [0, 0, 0, 0, 0] | loss: 0.07681484520435333
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 5 | count: [137, 44, 64, 137, 2] | prediction: [0, 0, 0, 0, 0] | loss: 0.0793282762169838
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 6 | count: [40, 11, 23, 5, 71] | prediction: [1, 0, 1, 0, 0] | loss: 0.029872078448534012
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 7 | count: [148, 199, 125, 46

Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/opt/anaconda3/lib/python3.7/

KeyboardInterrupt: 

#### Trainer Bayes

In [17]:

# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter()

# image = Image.open('../ProcessedData/SHHA/train/IMG_1.jpg')
# trans1 = transforms.ToTensor()
# img = trans1(image).to(device)
# img = img.unsqueeze(0)

# write to tensorboard

# writer.add_graph(model, img)
# writer.close()

In [92]:
class Trainer_Bayes():
    def __init__(self, dataloader, net, loss, optimizer,  validation_frequency=1, max_epoch=100):
        self.train_loader, self.val_loader, _ = dataloader()
        self.net = net
        self.loss = loss 
        self.optimizer = optimizer
        self.best_mae = 1e20
        self.best_mse = 1e20
        self.epoch = 0
        self.validation_frequency = validation_frequency
        self.max_epoch = max_epoch
        

    def train(self):
        for epoch in range(0, self.max_epoch):
            self.epoch = epoch

            # training    
            self.train_epoch()

            # validation
            if epoch % self.validation_frequency == 0:
                self.validate()
                
        print(f'Train finished | best_mse: {self.best_mse} | best_mae: {self.best_mae}')
                

    def train_epoch(self):
        self.net.train()  # Set model to training mode
        # running_loss = 0.0
        # Iterate over data.
        for step, (inputs, points, targets, st_sizes) in enumerate(self.train_loader):
            inputs = inputs.to(device)
            print(inputs.size())
            st_sizes = st_sizes.to(device)
            gd_count = np.array([len(p) for p in points], dtype=np.float32)
            points = [p.to(device) for p in points]
            targets = [t.to(device) for t in targets]
            
            with torch.set_grad_enabled(True):
                outputs = self.net(inputs)
                prob_list = post_prob(points, st_sizes)
                loss = self.loss(prob_list, targets, outputs)

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                N = inputs.size(0) # batch size
                pre_count = torch.sum(outputs.view(N, -1), dim=1).detach().cpu().numpy()
                res = pre_count - gd_count

                print(f'epoch: {self.epoch} | step: {step} | gd_count: {gd_count} | prediction: {pre_count} | loss: {loss}')

                # running_loss += loss.item()
                # if step % 2 == 1:
                #   writer.add_scalar('training loss vgg',
                #               running_loss / 2,
                #               self.epoch * len(self.train_loader) + step)
                #   running_loss = 0.0

    def validate(self):
        epoch_start = time.time()
        self.net.eval()  # Set model to evaluate mode
        epoch_res = []

        # Iterate over data.
        for inputs, count, name in self.val_loader:
            inputs = inputs.to(device)
            # inputs are images with different sizes
            assert inputs.size(0) == 1 # 'the batch size should equal to 1 in validation mode'
            with torch.set_grad_enabled(False):
                outputs = self.net(inputs)
                res = count[0].item() - torch.sum(outputs).item()
                epoch_res.append(res)


        epoch_res = np.array(epoch_res)
        mse = np.sqrt(np.mean(np.square(epoch_res)))
        mae = np.mean(np.abs(epoch_res))

        # ...log the running loss
        writer.add_scalar('val MAE vgg',
                            mae,
                            self.epoch * len(self.val_loader))
        writer.add_scalar('val MSE vgg',
                        mse,
                        self.epoch * len(self.val_loader))

        print('Epoch {} Val, MSE: {:.2f} MAE: {:.2f}, Cost {:.1f} sec'
                     .format(self.epoch, mse, mae, time.time()-epoch_start))

        if (2.0 * mse + mae) < (2.0 * self.best_mse + self.best_mae):
            self.best_mse = mse
            self.best_mae = mae
            print("save best mse {:.2f} mae {:.2f} model epoch {}".format(self.best_mse,
                                                                            self.best_mae,
                                                                                 self.epoch))
            torch.save(self.net.state_dict(), os.path.join(save_dir, 'best_model_bayes.pth'))

In [93]:
lr = 0.00001
weight_decay = 0.9
sigma = 0.1
use_background = False
background_ratio = 1

bayes_net = vgg19().to(device)
optimizer = optim.Adam(bayes_net.parameters(), lr=lr, weight_decay=weight_decay)

post_prob = Post_Prob(sigma,
                           crop_size,
                           downsample_ratio,
                           background_ratio,
                           use_background,
                           device)
loss = Bay_Loss(use_background, device)

bayes_trainer = Trainer_Bayes(loading_data_Bayes, bayes_net, loss, optimizer, max_epoch=3)
bayes_trainer.train()

torch.Size([5, 3, 256, 256])
epoch: 0 | step: 0 | gd_count: [ 54.  75.  73. 336. 168.] | prediction: [40.376045 27.934978 27.685867 53.104065 34.23043 ] | loss: 105.5298843383789
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 1 | gd_count: [ 25.   3.  36.  32. 449.] | prediction: [37.186348 37.789803 53.837242 55.03681  52.229515] | loss: 107.7060546875
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 2 | gd_count: [ 23. 257.  51.  16.  57.] | prediction: [38.97393  56.050884 47.943253 32.612446 50.821888] | loss: 56.00762939453125
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 3 | gd_count: [ 47. 214.  23.  91.  54.] | prediction: [40.408295 63.39425  36.465775 68.987434 76.11583 ] | loss: 48.712764739990234
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 4 | gd_count: [ 33.   0. 204. 202.  35.] | prediction: [58.533924 21.874063 45.64112  59.16406  40.633938] | loss: 76.14788818359375
torch.Size([5, 3, 256, 256])
epoch: 0 | step: 5 | gd_count: [469.   5. 240.  55.  35.] | prediction: [87.64

KeyboardInterrupt: 

## Results on test set

#### Test GT

In [104]:
_, _, test_dataloader = loading_data_GT()

gt_net.load_state_dict(torch.load(os.path.join(save_dir, 'best_model_gt.pth'), device))
gt_net.eval()
errors = []


for vi, data in enumerate(test_dataloader, 0):
    img, gt_map = data

    with torch.no_grad():
        img = Variable(img).to(device)
        assert img.size(0) == 1
        gt_map = Variable(gt_map).to(device)
        pred_density_map = gt_net(img)
        pred_cnt = int(gt_map[0].sum().data / LOG_PARA)
        gt_count = int(pred_density_map[0].sum().data/LOG_PARA)
        error = gt_count - pred_cnt
        print(vi, error, gt_count, pred_cnt)

        errors.append(error)


errors = np.array(errors)
mse = np.sqrt(np.mean(np.square(errors)))
mae = np.mean(np.abs(errors))

log_str = 'Final Test: mae {}, mse {}'.format(mae, mse)
print(log_str)

0 -12 399 411
1 223 291 68
2 138 367 229
3 -295 466 761
4 -174 293 467
5 -57 297 354
6 -15 278 293
7 -657 495 1152
8 -7 477 484
9 241 389 148
10 168 366 198
11 39 331 292
12 56 556 500
13 -752 404 1156
14 -247 354 601
15 224 338 114
16 -298 418 716
17 80 420 340
18 94 394 300
19 -11 369 380
20 -74 509 583
21 72 592 520
22 -288 428 716
23 -229 338 567
24 127 261 134
25 57 364 307
26 -130 244 374
27 -577 408 985
28 -33 329 362
29 345 462 117
30 292 542 250
31 235 375 140
32 -28 524 552
33 -859 372 1231
34 195 260 65
35 -207 359 566
36 186 435 249
37 -107 481 588
38 208 437 229
39 265 482 217
40 -45 310 355
41 -728 637 1365
42 -80 317 397
43 202 358 156
44 -312 544 856
45 -513 509 1022
46 -292 236 528
47 -34 569 603
48 -111 402 513
49 -1190 390 1580
50 125 388 263
51 -544 376 920


Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/opt/anaconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 

#### Test Bayes

In [None]:
_, _, test_dataloader = loading_data_Bayes()

bayes_net.load_state_dict(torch.load(os.path.join(save_dir, 'best_model_bayes.pth'), device))
errors = []

for inputs, count, name in test_dataloader:
    inputs = inputs.to(device)
    assert inputs.size(0) == 1
    with torch.set_grad_enabled(False):
        outputs = bayes_net(inputs)
        error = count[0].item() - torch.sum(outputs).item()
        print(name, error, count[0].item(), torch.sum(outputs).item())
        errors.append(error)

errors = np.array(errors)
mse = np.sqrt(np.mean(np.square(errors)))
mae = np.mean(np.abs(errors))
log_str = 'Final Test: mae {}, mse {}'.format(mae, mse)
print(log_str)


In [None]:
## Fonction Victor - à garder pour le github final

def test_bayes(net, test_data, has_loader=False):
    """
    net : the trained network
    has_loader : if false, we are just giving a single input and want to get its results, 
        else we are giving a dataloader
    test_data : just input (np.array) and count if has_loader == False, data_loader if not
    """
    
    net.eval()  # Set model to evaluate mode
    
    if not has_loader:
        img, count = test_data[0], len(test_data[1])
        # img must be a np array
        img = img.to(device)
        img = np.asarray(img)
        #the chanels must be in first position in order to work
        if img.shape[0] != 3:
            img = np.moveaxis(img, (0,1,2), (1,2,0))
        img = torch.Tensor(img).unsqueeze(0)
        with torch.set_grad_enabled(False):
            outputs = net(img)
            res = np.abs(count - torch.sum(outputs).item())
        return outputs, res
    
    else:
        full_res = []

        # Iterate over data.
        for inputs, count, name in test_data:
            print(name)
            inputs = inputs.to(device)
            # inputs are images with different sizes
            assert inputs.size(0) == 1, 'the batch size should equal to 1 in test mode'
            with torch.set_grad_enabled(False):
                outputs = net(inputs)
                res = count[0].item() - torch.sum(outputs).item()
                full_res.append(res)


        res = np.array(full_resres)
        mse = np.sqrt(np.mean(np.square(res)))
        mae = np.mean(np.abs(res))

        print('MSE: {:.2f} MAE: {:.2f}'
                     .format(mse, mae))
        return