In [None]:
import os
import sys
import time 
from random import randrange


import numpy as np
import cv2
from matplotlib import pyplot as plt
import importlib
import torch
import torchvision

from torchvision import datasets, transforms
from torch import nn, optim
import torch.nn.functional as F

sys.path.append("../")

import config

from model_training import training_data_loader
from model_training.training_data_loader import AugmentedBatchesTrainingData


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cpu_device = torch.device('cpu')
device

In [None]:
TRAINING_DIRS_1 = [
    '31_03_21__318__3or4_people/1/006__11_44_59',
    '31_03_21__318__3or4_people/1/007__11_48_59',
    '31_03_21__318__3or4_people/1/008__11_52_59',
    '31_03_21__318__3or4_people/1/009__11_57_00',
     ]

TRAINING_DIRS_2 = [
    '31_03_21__318__3or4_people/2/000__14_15_19',
    '31_03_21__318__3or4_people/2/001__14_19_19',
    '31_03_21__318__3or4_people/2/002__14_23_19',
    '31_03_21__318__3or4_people/2/003__14_27_20',
    '31_03_21__318__3or4_people/2/004__14_31_20',

    '31_03_21__318__3or4_people/2/010__14_55_20',
    '31_03_21__318__3or4_people/2/011__14_59_20',
    '31_03_21__318__3or4_people/2/012__15_03_21',
    '31_03_21__318__3or4_people/2/013__15_07_21',
    '31_03_21__318__3or4_people/2/014__15_11_21',
    '31_03_21__318__3or4_people/2/015__15_15_21',
    '31_03_21__318__3or4_people/2/016__15_19_21',
    ]

VALIDATION_DIRS_1 = [
    '31_03_21__318__3or4_people/2/005__14_35_20',
#     '31_03_21__318__3or4_people/2/006__14_39_20',
    '31_03_21__318__3or4_people/2/007__14_43_20',
#     '31_03_21__318__3or4_people/2/008__14_47_20',
    '31_03_21__318__3or4_people/2/009__14_51_20',
    
#     '05_05_2021__0to5_people/007__13_22_20'
]

_training_data_1 = training_data_loader.load_data_for_labeled_batches(labeled_batch_dirs=TRAINING_DIRS_1)
_training_data_2 = training_data_loader.load_data_for_labeled_batches(labeled_batch_dirs=TRAINING_DIRS_2)
_validation_data_1 = training_data_loader.load_data_for_labeled_batches(labeled_batch_dirs=VALIDATION_DIRS_1)

augmented_data_training = training_data_loader.AugmentedBatchesTrainingData()
augmented_data_training.add_training_batch(_training_data_1)
augmented_data_training.add_training_batch(_training_data_2)

augmented_data_validation = training_data_loader.AugmentedBatchesTrainingData()
augmented_data_validation.add_training_batch(_validation_data_1, flip_and_rotate=False)



# for centre_points in _training_data_2.centre_points:
#     if len(centre_points) > 4:
#         print("Too many people on one frame in annotations!")

In [None]:
augmented_data_training.print_stats()

In [None]:
augmented_data_validation.print_stats()

In [None]:
def draw_airbrush_circle(img, centre, radius):
    for x in range(max(0, round(centre[0]-radius)), min(img.shape[0], round(centre[0]+radius+1))):
        for y in range(max(0, round(centre[1]-radius)), min(img.shape[1], round(centre[1]+radius+1))):
            point = (x, y)
            distance_to_centre = cv2.norm((centre[0] - x, centre[1] - y))
            if distance_to_centre > radius:
                continue
            img[point] += 1 - distance_to_centre / radius
            

def draw_cross(img, centre, cross_width, cross_height):
    for x in range(max(0, round(centre[0]) - cross_width), min(img.shape[0], round(centre[0]) + cross_width + 1)):
        for y in range(max(0, round(centre[1]) - cross_height), min(img.shape[1], round(centre[1]) + cross_height + 1)):
            point = (x, y)
            img[point] = 1
    
    for x in range(max(0, round(centre[0] - cross_height)), min(img.shape[0], round(centre[0] + cross_height + 1))):
        for y in range(max(0, round(centre[1] - cross_width)), min(img.shape[1], round(centre[1] + cross_width + 1))):
            point = (x, y)
            img[point] = 1
    
    
def get_img_reconstructed_from_labels(centre_points):
    img_reconstructed = np.zeros(shape=(config.IR_CAMERA_RESOLUTION[0], 
                                 config.IR_CAMERA_RESOLUTION[1]))

    for centre_point in centre_points:
        centre_point = centre_point[::-1]  # reversed x and y in 
        draw_airbrush_circle(img=img_reconstructed, 
                   centre=[round(c) for c in centre_point], 
                   radius=6)
    
    #img_int = (img_reconstructed * (NUMBER_OF_OUPUT_CLASSES-1)).astype('int')
    #return img_int
    return img_reconstructed

In [None]:
# cp = _validation_data_1.centre_points[11]
# img = get_img_reconstructed_from_labels(cp)
# plt.imshow(img)
# print(np.sum(img))

sum_of_values_for_one_person = 35  # total sum of pixels for one person on the reconstructed image, of course it changes with circle radius, etc



In [None]:
class DatasetOutputMode:
    PEOPLE_COUNT = 'PEOPLE_COUNT'
    RECONSTRUCTED_IMG = 'RECONSTRUCTED_IMG'
    BOTH_PEOPLE_COUNT_AND_RECONSTRUCTED_IMG = 'BOTH_PEOPLE_COUNT_AND_RECONSTRUCTED_IMG'
    

class IrPersonsUnetTrainDataset(torch.utils.data.Dataset):
    def __init__(self, augmented_data: AugmentedBatchesTrainingData, mode=DatasetOutputMode.RECONSTRUCTED_IMG):
        self.augmented_data = AugmentedBatchesTrainingData
        self._index_to_batch_and_subindex_map = {}
        self._index_to_variance_map = {}
        self._mode = mode
        self._cache = {}
        
        i = 0
        for b, batch in enumerate(augmented_data.batches):
            frames_in_batch = np.zeros(shape=(len(batch.normalized_ir_data), *batch.normalized_ir_data[0].shape))
            #print(frames_in_batch.shape)
            i0 = i
            
            for j in range(len(batch.normalized_ir_data)):
                self._index_to_batch_and_subindex_map[i] = (batch, j) 
                frames_in_batch[j] = batch.normalized_ir_data[j]
                i += 1
            
            i = i0
            for j in range(len(batch.normalized_ir_data)):
                variance_frames_range = 10
                subframes = frames_in_batch[max(0, j - variance_frames_range):j + variance_frames_range]
                v = np.std(subframes, axis=(0)) * 20
                v = np.clip(v, 0, 1)
                self._index_to_variance_map[i] = v
                i += 1
                
        
    def __len__(self):
        return len (self._index_to_batch_and_subindex_map)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            raise Exception("Not supported")
        
        if idx not in self._cache:
            batch, subindex = self._index_to_batch_and_subindex_map[idx]
            frame = batch.normalized_ir_data[subindex]

            batch, subindex = self._index_to_batch_and_subindex_map[idx]
            centre_points = batch.centre_points[subindex]
            img_reconstructed = get_img_reconstructed_from_labels(centre_points)

            frame_variance = self._index_to_variance_map[idx]
            frame_3d = np.stack((frame, frame_variance))
            
            number_of_people = len(batch.centre_points[subindex])
            if self._mode == DatasetOutputMode.RECONSTRUCTED_IMG:
                result = frame_3d, img_reconstructed
            elif self._mode == DatasetOutputMode.PEOPLE_COUNT:
                result = frame_3d, number_of_people
            else:
                result = frame_3d, (number_of_people, img_reconstructed)
            
            self._cache[idx] = result
            
        return self._cache[idx]

    def get_number_of_persons_for_frame(self, idx):
        batch, subindex = self._index_to_batch_and_subindex_map[idx]
        return len(batch.centre_points[subindex])
        
    
    
    
training_dataset = IrPersonsUnetTrainDataset(augmented_data_training)
validation_dataset = IrPersonsUnetTrainDataset(augmented_data_validation)


# it makes no sense to split all data, as most of the frames are almost identical
# training_dataset, validation_dataset = torch.utils.data.random_split(all_data_dataset, [training_data_len, validation_data_len])


trainloader = torch.utils.data.DataLoader(training_dataset, batch_size=16, shuffle=True)
valloader = torch.utils.data.DataLoader(validation_dataset, batch_size=16, shuffle=True)


print(len(trainloader))
print(len(valloader))

In [None]:
# loader_iterator = iter(valloader)

# for i in range(450):
#     xb, yb = next(loader_iterator)

#     if i < 400:
#         continue
    
#     xb.shape, yb.shape

#     print('#'*30)
# #     plt.imshow(yb[0].numpy().squeeze())
# #     plt.show()
# #     plt.imshow(xb[0][0].numpy().squeeze())
# #     plt.show()
#     plt.imshow(xb[0][1].numpy().squeeze())
#     plt.show()

#     print(f'number of persosn based on sum: {np.sum(yb[0].numpy()) / sum_of_values_for_one_person:.3f}')

In [None]:
# m = xb[0][1].numpy()
# # m = np.clip(m, 0, 1)
# np.max(m)

In [None]:
seed = 336

torch.manual_seed(seed)


torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [None]:
from torch import nn
class UNET(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.conv1 = self.contract_block(in_channels, 32, 5, 2)
        self.conv2 = self.contract_block(32, 64, 3, 1)
        #self.conv3 = self.contract_block(64, 128, 3, 1)

        #self.upconv3 = self.expand_block(128, 64, 3, 1)
        self.upconv2 = self.expand_block(64, 32, 3, 1)
        self.upconv1 = self.expand_block(32*2, out_channels, 3, 1)
        
    def __call__(self, x):

        # downsampling part
        conv1 = self.conv1(x)
        conv2 = self.conv2(conv1)
        #conv3 = self.conv3(conv2)

        #upconv3 = self.upconv3(conv3)

        #upconv2 = self.upconv2(torch.cat([upconv3, conv2], 1))
        upconv2 = self.upconv2(conv2)
        
        upconv1 = self.upconv1(torch.cat([upconv2, conv1], 1))

        upconv1_single = upconv1[:, 0, : ,:]
        
#         print(f'upconv1.shape = {upconv1.shape}')
#         print(f'upconv1_single.shape = {upconv1_single.shape}')

        #return upconv1
        return upconv1_single

    def contract_block(self, in_channels, out_channels, kernel_size, padding):

        contract = nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.ReLU(),
            torch.nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
                                 )

        return contract

    def expand_block(self, in_channels, out_channels, kernel_size, padding):

        expand = nn.Sequential(torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=padding),
                            torch.nn.BatchNorm2d(out_channels),
                            torch.nn.ReLU(),
                            torch.nn.Conv2d(out_channels, out_channels, kernel_size, stride=1, padding=padding),
                            torch.nn.BatchNorm2d(out_channels),
                            torch.nn.ReLU(),
                            torch.nn.ConvTranspose2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1, output_padding=1) 
                            )
        return expand
    
    
unet = UNET(2, 1).double()
unet = unet.to(device)

In [None]:
# model=unet
# model_parameters = filter(lambda p: p.requires_grad, model.parameters())
# params = sum([np.prod(p.size()) for p in model_parameters])
# params

In [None]:
# model=unet
# sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
def validate_model(loader, model):
    correct_count = 0
    tested_frames = 0
    number_of_frames_with_n_persons = {}
    number_of_frames_with_n_persons_predicted_correctly = {}

    persons_error_sum = 0

    for frame, labels in loader:
        for i in range(len(labels)):
            with torch.no_grad():
                outputs = model(frame.to(device)).to(cpu_device)
            predicted_img = outputs[i].numpy()

            pred_people = np.sum(predicted_img) / sum_of_values_for_one_person
            pred_label = round(pred_people)

            true_label = round(np.sum(labels.numpy()[i]) / sum_of_values_for_one_person)  # not true entirely, but good enough for testing. One would need to obtain real number of people on image

            persons_error_sum += abs(pred_people - true_label)

            number_of_frames_with_n_persons[pred_label] = \
                number_of_frames_with_n_persons.get(pred_label, 0) + 1

            if true_label == pred_label:
                correct_count += 1
                number_of_frames_with_n_persons_predicted_correctly[pred_label] = \
                    number_of_frames_with_n_persons_predicted_correctly.get(pred_label, 0) + 1

            tested_frames += 1


    average_prediction_error = persons_error_sum / tested_frames
    model_accuracy = correct_count / tested_frames
    
    print(f"Number of tested frames: {tested_frames}")
    print(f"Model Accuracy = {model_accuracy}")
    print('Predicted:\n' + '\n'.join([f'   {count} frames with {no} persons' for no, count in number_of_frames_with_n_persons.items()]))
    print('Predicted correctly:\n' + '\n'.join([f'   {count} frames with {no} persons' for no, count in number_of_frames_with_n_persons_predicted_correctly.items()]))
    print(f'average_prediction_error: {average_prediction_error}')
    
    return model_accuracy, average_prediction_error

In [None]:
def train(model, train_dl, valid_dl, loss_fn, optimizer, epochs=1):
    start = time.time()
    train_loss, valid_loss, valid_error = [], [], []

    for epoch in range(epochs):
        print('Epoch {}/{}'.format(epoch, epochs - 1))
        print('-' * 10)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train(True)  # Set trainind mode = true
                dataloader = train_dl
            else:
                model.train(False)  # Set model to evaluate mode
                dataloader = valid_dl

            running_loss = 0.0

            step = 0
            for x, y in dataloader:
                if randrange(100) != 1:  # do not train on every frame in each epoch
                    if step != 0:
                        continue
                
                step += 1
                
                x = x.to(device)
                y = y.to(device)
                

                if phase == 'train':
                    optimizer.zero_grad()
                    outputs = model(x)
                    loss = loss_fn(outputs, y)
                    loss.backward()
                    optimizer.step()
                else:
                    with torch.no_grad():
                        outputs = model(x)
                        loss = loss_fn(outputs, y)
                    if step == 1:
                        plt.imshow(outputs[0].to(cpu_device))
                        plt.show()
                        
                        if (epoch+1) % 10 == 0:
                            # full validation of the model
                            accuracy, prediction_error = validate_model(loader=valid_dl, model=model)
                            valid_error.append(prediction_error)
                                

                running_loss += loss.item()
                
                          
            epoch_loss = running_loss / step
            lr = optimizer.param_groups[0]['lr']
            print(f'{phase} Loss: {epoch_loss:.4f}. lr={lr}')
            train_loss.append(epoch_loss) if phase=='train' else valid_loss.append(epoch_loss)
            
        if epoch > 10 and valid_error and valid_error[-1] < 0.12:  # TODO - different value for different validation data
            print('Training finished, results good enough...')
            break
            
            
    time_elapsed = time.time() - start
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')    
    return train_loss, valid_loss, valid_error    

  
# sometimes one need to reinitialize the weights, after a few epcochs (like 10) the result should be already good)
# unet = UNET(2, 1).double()
# unet = unet.to(device)


loss_fn = nn.L1Loss()
# loss_fn = nn.MSELoss()  # sometimes learn strangely

opt = torch.optim.Adam(unet.parameters(), lr=0.002)
train_loss, valid_loss, valid_error,  = train(model=unet, 
                               train_dl=trainloader, 
                               valid_dl=valloader, 
                               loss_fn=loss_fn, 
                               optimizer=opt, 
                               epochs=200)

# if train_loss[-1] > 0.04:
#     raise Exception("Training error, reinitialize weights. Network sometimes doesn't learn as it should")

In [None]:
plt.plot(train_loss)
plt.plot(valid_loss)
plt.legend(['train', 'valid'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.grid()
ax = plt.gca()
ax.set_yscale('log')

In [None]:
plt.plot(valid_error)

In [None]:
# for _ in range(22):
#     xb, yb = next(iter(valloader))
#     #xb, yb = next(iter(train_d1))
    
#     plt.imshow(xb[0].numpy().squeeze())
#     plt.show()
#     plt.imshow(yb[0].numpy().squeeze())
#     plt.show()

#     with torch.no_grad():
#         outputs = unet(xb.to(device)).to(cpu_device)
    
#     predicted_img = outputs[0].numpy()
#     plt.imshow(predicted_img)
#     plt.show()
#     print(f'number of persosn based on sum: {np.sum(predicted_img) / sum_of_values_for_one_person:.3f}')
#     print("======================================")

In [None]:
validate_model(loader=valloader, model=unet)

In [None]:
VALIDATION_DIRS_2 = [
    '05_05_2021__0to5_people/012__13_42_20',
    '05_05_2021__0to5_people/013__13_46_21',
    '05_05_2021__0to5_people/014__13_50_21',
    '05_05_2021__0to5_people/015__13_54_21',
    
    '05_05_2021__0to5_people/007__13_22_20',
    
    '31_03_21__318__3or4_people/2/005__14_35_20',
    '31_03_21__318__3or4_people/2/006__14_39_20',
    '31_03_21__318__3or4_people/2/007__14_43_20',
    '31_03_21__318__3or4_people/2/008__14_47_20',
    '31_03_21__318__3or4_people/2/009__14_51_20',
]





data_with_people_count = training_data_loader.load_data_for_labeled_batches(labeled_batch_dirs=VALIDATION_DIRS_2)

augmented_data_with_people_count = training_data_loader.AugmentedBatchesTrainingData()
augmented_data_with_people_count.add_training_batch(data_with_people_count, flip_and_rotate=False)


dataset_with_real_people_count = IrPersonsUnetTrainDataset(augmented_data_with_people_count, mode=DatasetOutputMode.BOTH_PEOPLE_COUNT_AND_RECONSTRUCTED_IMG)
loader_with_real_people_count = torch.utils.data.DataLoader(dataset_with_real_people_count, batch_size=1, shuffle=False)

In [None]:
# for frame, labels in loader_with_real_people_count:
#     print(labels[1].shape)
#     print(labels[0].numpy()[0])
#     print(labels[1][0].numpy())
#     break
# len(frame)

In [None]:
def validate_model_with_real_number_of_persons(loader, model, data_plotting_interval=1000):
    correct_count = 0
    tested_frames = 0
    number_of_frames_with_n_persons = {}
    number_of_frames_with_n_persons_predicted_correctly = {}

    persons_error_sum = 0
    
    vec_real_number_of_persons = []
    vec_reconstructed_number_of_persons = []  # calculated from reconstructed image for training
    vec_predicted_number_of_persons = []

    for frame, people_count_and_reconstructed_image in loader:
        for i in range(len(frame)):
            with torch.no_grad():
                outputs = model(frame.to(device)).to(cpu_device)
#             print(outputs.shape)
            predicted_img = outputs[i].numpy()

            pred_people = np.sum(predicted_img) / sum_of_values_for_one_person
            pred_label = round(pred_people)
            

            true_people_count = people_count_and_reconstructed_image[0].numpy()[0]
            reconstructed_image = people_count_and_reconstructed_image[1][0].numpy()  # training data for unet
            vec_reconstructed_number_of_persons.append(np.sum(reconstructed_image) / sum_of_values_for_one_person)
            
            persons_error_sum += abs(pred_people - true_people_count)

            number_of_frames_with_n_persons[pred_label] = \
                number_of_frames_with_n_persons.get(pred_label, 0) + 1

            if true_people_count == pred_label:
                correct_count += 1
                number_of_frames_with_n_persons_predicted_correctly[pred_label] = \
                    number_of_frames_with_n_persons_predicted_correctly.get(pred_label, 0) + 1

            tested_frames += 1
            
            vec_real_number_of_persons.append(true_people_count)
            vec_predicted_number_of_persons.append(pred_people)
            
            if tested_frames % data_plotting_interval == 0:
                plt.imshow(frame[i, 0, :, :])
                plt.show()
                print(f'true_people_count={true_people_count}, pred_people={pred_people}')
                plt.imshow(predicted_img)
                plt.show()
                print('#'*30)


    average_prediction_error = persons_error_sum / tested_frames
    model_accuracy = correct_count / tested_frames
    
    print(f"Number of tested frames: {tested_frames}")
    print(f"Model Accuracy = {model_accuracy}")
    print('Predicted:\n' + '\n'.join([f'   {count} frames with {no} persons' for no, count in number_of_frames_with_n_persons.items()]))
    print('Predicted correctly:\n' + '\n'.join([f'   {count} frames with {no} persons' for no, count in number_of_frames_with_n_persons_predicted_correctly.items()]))
    print(f'average_prediction_error: {average_prediction_error}')
    
    return model_accuracy, average_prediction_error, vec_real_number_of_persons, \
            vec_predicted_number_of_persons, vec_reconstructed_number_of_persons


_, _, real_vec, predicted_vec, vec_reconstructed_number_of_persons = \
        validate_model_with_real_number_of_persons(loader=loader_with_real_people_count, model=unet)


In [None]:
# %matplotlib
fig=plt.figure(figsize=(8,6), dpi= 100, facecolor='w', edgecolor='k')

plt.grid()
plt.stairs(real_vec, linewidth=2.5)
plt.stairs(predicted_vec, linewidth=1)

plt.title('number of people')
plt.legend(['real', 'predicted'])
plt.xlabel('time [0.5*s]')
plt.ylabel('people count')
# plt.stairs(vec_reconstructed_number_of_persons)

# plt.show()
# plt.plot(np.array(vec_reconstructed_number_of_persons) - np.array(predicted_vec))

