In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.models import resnet34, EfficientNet_B1_Weights
from torchvision.datasets import CIFAR10
from torchmetrics.regression import KendallRankCorrCoef,PearsonCorrCoef
import matplotlib.pyplot as plt

import os
import time
import numpy as np
import pandas as pd

from PIL import Image
from tensorboardX import SummaryWriter
from sklearn import preprocessing
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn as nn
from torchvision.transforms import AutoAugmentPolicy, AutoAugment, RandomPerspective, RandomAffine, ElasticTransform, Grayscale
from copy import copy

import urllib
import cv2

torch.cuda.is_available()
torch.cuda.get_device_name(0)

In [None]:
### Model based on Z. Zou, X. Zhang, H. Liu, Z. Li, A. Hussain, i J. Li, „A novel multimodal fusion network based on a joint coding model for lane line segmentation”. http://arxiv.org/abs/2103.11114

In [None]:
data_root = "./dataStructure/training"
validation_root="./dataStructure/validation"
txt_file_train = "label_2"

thing_classes = ['Pedestrian']
train_img_count = 5

In [None]:
def get_pedestrian_label(data_root, txt_folder):
    annotations = []
    labels_folder =os.path.join(data_root, txt_folder)
    for file in os.listdir(labels_folder):
        file_id = file[:-4]
        file_path = os.path.join(labels_folder,file)
        with open(file_path, 'r') as opened_file:
            bb_list = []
            for line in opened_file:
              coordinates = line.split()
              if coordinates == []:
                continue
              class_name, _, _,_, xmin, ymin, xmax, ymax, _, _ ,_ , _, _, _, _= map(str, coordinates)
              if class_name in thing_classes:
                xmin = int(float(xmin))
                ymin = int(float(ymin))
                xmax = int(float(xmax))
                ymax = int(float(ymax))
                bb_list.append((xmin,ymin,xmax,ymax))
        annotations.append((file_id,bb_list))

    return annotations

In [None]:
train_data_dict = get_pedestrian_label(data_root, txt_file_train)
import random

for d in random.sample(train_data_dict, 1):
    image_path = os.path.join(data_root, "image_2", d[0] + ".png")
    img = cv2.imread(image_path)
    h1, w1, _ = img.shape
    for bb in d[1]:
      x1 = bb[0]
      x2 = bb[2]
      y1= bb[1]
      y2 = bb[3]
      cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)  

    plt.figure(figsize = (12, 12))
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.show()

for d in random.sample(train_data_dict, 1):
    image_path = os.path.join(data_root, "densec", d[0] + ".png")
    imgdense = cv2.imread(image_path)
    imgdense = cv2.resize(imgdense, (w1, h1))
    for bb in d[1]:
        x1, y1, x2, y2 = bb
        cv2.rectangle(imgdense, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    plt.figure(figsize = (12, 12))
    plt.imshow(imgdense)
    plt.show()


In [None]:
def writeBboxColor(imagename,train_data_dict):
  for label in train_data_dict:
    if label[0]==imagename:
      image_path = os.path.join(data_root, "image_2", imagename + ".png")
      img = Image.open(image_path)
      return img

In [None]:
def writeBboxDense(imagename,train_data_dict):
  for label in train_data_dict:
    if label[0]==imagename:
      image_path = os.path.join(data_root, "densec", imagename + ".png")
      img = Image.open(image_path)
      return img

In [None]:
class kitti_loader:
    def __init__(self, directory,transform):
            txt_file_train = "label_2"
            self.root_dir = directory
            self.transform = transform
            self.train_data_dict = get_pedestrian_label(data_root, txt_file_train) 
    def __len__(self):
            return len(os.listdir(self.root_dir+"/calib"))

    def __getitem__(self, idx):
        formatedidx=f"{idx:06}"
        if torch.is_tensor(formatedidx):
            formatedidx = idx.tolist()
        image = writeBboxColor(formatedidx,self.train_data_dict)
        dense = writeBboxDense(formatedidx,self.train_data_dict)
        if self.transform:
            image = self.transform(image)
            dense = self.transform(dense)
        labelList=[]
        for label in train_data_dict:
                if label[0]==formatedidx:
                    if label[1]!=[]:
                        labelList.append(label[1][0])
                    else:
                        labelList.append((0,0,0,0))
        labelList=torch.tensor(labelList)
        return image,dense,labelList

In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
    #IMAGE RGB
        # Block A pink
        self.A_block_conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.A_block_bn = nn.BatchNorm2d(64)
        self.A_block_relu = nn.ReLU()

        # Block B red
        self.resnet34 = models.resnet34(pretrained=False)
        self.B_resnet34= nn.Sequential(*list(self.resnet34.children())[:-2])
        self.B_resnet34[0] = nn.Conv2d(64, 64, kernel_size=7, stride=2, padding=3)
        self.B_resnet34 = nn.Sequential(
            self.B_resnet34,
            nn.Conv2d(512, 64, kernel_size=7, stride=2, padding=3)
        )

        # Block C red
        self.resnet34 = models.resnet34(pretrained=False)
        self.C_resnet34= nn.Sequential(*list(self.resnet34.children())[:-2])
        self.C_resnet34[0] = nn.Conv2d(64, 64, kernel_size=7, stride=2, padding=3)
        self.C_resnet34 = nn.Sequential(
            self.C_resnet34,
            nn.Conv2d(512, 128, kernel_size=7, stride=2, padding=3)
        )
        # Block D pink
        self.D_block_conv = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.D_block_bn = nn.BatchNorm2d(256)
        self.D_block_relu = nn.ReLU()

        # Block E pink
        self.E1_block_conv = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.E1_block_bn = nn.BatchNorm2d(512)
        self.E1_block_relu = nn.ReLU()

        self.E2_block_conv = nn.Conv2d(in_channels=512, out_channels=384, kernel_size=3, padding=1)
        self.E2_block_bn = nn.BatchNorm2d(384)
        self.E2_block_relu = nn.ReLU()

        #block F blue
        self.F_block_convt = nn.ConvTranspose2d(in_channels=384, out_channels=64, kernel_size=3, padding=1)   # out tymczasowo na 10 bo test na cifarze
        self.F_block_bn = nn.BatchNorm2d(64)
        self.F_block_relu = nn.ReLU()

    #POINTS
        #block G pink
        self.G_block_conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.G_block_bn = nn.BatchNorm2d(64)
        self.G_block_relu = nn.ReLU()

        #block H red
        self.resnet34 = models.resnet34(pretrained=False)
        self.H_resnet34= nn.Sequential(*list(self.resnet34.children())[:-2])
        self.H_resnet34[0] = nn.Conv2d(64, 64, kernel_size=7, stride=2, padding=3)
        self.H_resnet34 = nn.Sequential(
            self.H_resnet34,
            nn.Conv2d(512, 64, kernel_size=7, stride=2, padding=3)
        )

        #block I red
        self.resnet34 = models.resnet34(pretrained=False)
        self.I_resnet34= nn.Sequential(*list(self.resnet34.children())[:-2])
        self.I_resnet34[0] = nn.Conv2d(64, 64, kernel_size=7, stride=2, padding=3)
        self.I_resnet34 = nn.Sequential(
            self.I_resnet34,
            nn.Conv2d(512, 128, kernel_size=7, stride=2, padding=3)
        )

        #block J pink
        self.J_block_conv = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.J_block_bn = nn.BatchNorm2d(256)
        self.J_block_relu = nn.ReLU()

        #block K pink
        self.K1_block_conv = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.K1_block_bn = nn.BatchNorm2d(512)
        self.K1_block_relu = nn.ReLU()

        self.K2_block_conv = nn.Conv2d(in_channels=512, out_channels=384, kernel_size=3, padding=1)
        self.K2_block_bn = nn.BatchNorm2d(384)
        self.K2_block_relu = nn.ReLU()

        #block L blue
        self.L_block_convt = nn.ConvTranspose2d(in_channels=384, out_channels=64, kernel_size=3, padding=1)
        self.L_block_bn = nn.BatchNorm2d(64)
        self.L_block_relu = nn.ReLU()

        #block M red/pink
        self.resnet34 = models.resnet34(pretrained=False)
        self.M1_resnet34= nn.Sequential(*list(self.resnet34.children())[:-2])
        self.M1_resnet34[0] = nn.Conv2d(256, 64, kernel_size=7, stride=2, padding=3)
        self.M1_resnet34 = nn.Sequential(
            self.M1_resnet34,
            nn.Conv2d(512, 192, kernel_size=7, stride=2, padding=3)
        )
        self.M2_block_conv = nn.Conv2d(in_channels=320, out_channels=320, kernel_size=3, padding=1)
        self.M2_block_bn = nn.BatchNorm2d(320)
        self.M2_block_relu = nn.ReLU()

        #block N red/blue
        self.N_in_block=nn.AdaptiveAvgPool2d((1,1))
        self.N1_resnet34= nn.Sequential(*list(self.resnet34.children())[:-2])
        self.N1_resnet34[0] = nn.Conv2d(448, 64, kernel_size=7, stride=2, padding=3)
        self.N1_resnet34 = nn.Sequential(
            self.N1_resnet34,
            nn.Conv2d(512, 192, kernel_size=7, stride=2, padding=3)
        )
        self.N2_block_convt = nn.ConvTranspose2d(in_channels=192, out_channels=192, kernel_size=3, padding=1)
        self.N2_block_bn = nn.BatchNorm2d(192)
        self.N2_block_relu = nn.ReLU()

        #block O pink/blue
        self.O0_block_conv = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=34, padding=1)
        self.O1_block_conv = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.O1_block_bn = nn.BatchNorm2d(256)
        self.O1_block_relu = nn.ReLU()

        self.O2_block_convt = nn.ConvTranspose2d(in_channels=320, out_channels=192, kernel_size=3, padding=1)
        self.O2_block_bn = nn.BatchNorm2d(192)
        self.O2_block_relu = nn.ReLU()

        #block P blue
        self.P_block_convt = nn.ConvTranspose2d(in_channels=192, out_channels=1, kernel_size=3, padding=1)   
        self.P_block_bn = nn.BatchNorm2d(1)
        self.P_block_relu = nn.ReLU()
        self.P_avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))

        self.classifier = nn.Sequential(
            nn.Flatten(), 
            nn.Linear(1, 1),  # Since your last layer outputs a tensor with 4 channels
            nn.Sigmoid()
        )
        self.bbox_regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1, 4)  
        )
        
    def forward(self, x, y):

        outA = self.A_block_relu(self.A_block_bn(self.A_block_conv(x)))
        outB = self.B_resnet34(outA)
        outC = self.C_resnet34(outB)
        outD = self.D_block_relu(self.D_block_bn(self.D_block_conv(outC)))
        outE = self.E2_block_relu(self.E2_block_bn(self.E2_block_conv(self.E1_block_relu(self.E1_block_bn(self.E1_block_conv(torch.cat([outD, outD], dim=1)))))))
        outF = self.F_block_relu(self.F_block_bn(self.F_block_convt(outE)))

        outG = self.G_block_relu(self.A_block_bn(self.A_block_conv(y)))
        outH = self.H_resnet34(outG)
        outI = self.I_resnet34(outH)
        outJ = self.J_block_relu(self.J_block_bn(self.J_block_conv(outI)))
        outK = self.K2_block_relu(self.K2_block_bn(self.K2_block_conv(self.K1_block_relu(self.K1_block_bn(self.K1_block_conv(torch.cat([outJ, outJ], dim=1)))))))
        outL = self.L_block_relu(self.L_block_bn(self.L_block_convt(outK)))

        outM = self.M2_block_relu(self.M2_block_bn(self.M2_block_conv(torch.cat([self.M1_resnet34(torch.cat([outC, outI], dim=1)), outF, outL], dim=1))))

        outH_pooled = self.N_in_block(outH)
        outB_pooled = self.N_in_block(outB)

        outN = self.N2_block_relu(self.N2_block_bn(self.N2_block_convt(self.N1_resnet34(torch.cat([outM, outH_pooled, outB_pooled], dim=1)))))

        outAA = self.O0_block_conv(outA)
        outGG = self.O0_block_conv(outG)

        outN_upsampled = F.interpolate(outN, size=(193, 193), mode='bilinear', align_corners=False)
        outO = self.O2_block_relu(self.O2_block_bn(self.O2_block_convt(torch.cat([outAA, self.O1_block_relu(self.O1_block_bn(self.O1_block_conv(torch.cat([outN_upsampled, outGG], dim=1))))], dim=1))))

        outP = self.P_avg_pool(self.P_block_relu(self.P_block_bn(self.P_block_convt(outO))))
        #print('outP',outP.size())
        outR=self.classifier(outP)

        bbox_output = self.bbox_regressor(outP)
        return outR,bbox_output

model = MyModel()

# print(model)

#summary(model, input_size=((32, 3, 128, 256), (32, 3, 128, 256)))

In [None]:
logdir = "logs"
experiment = 'late'

# Directory where logs will be saved. 
log_dir = os.path.join(logdir, experiment)

# initiate tensorboard summary writer
tb_writer = SummaryWriter(
    log_dir = log_dir,
    comment = "EarlyFusion"
)
train_accuracy_tag = 'accuracy/train'
validation_accuracy_tag = 'accuracy/validation'
train_loss_tag = 'loss/train'
validation_loss_tag = 'loss/validation'
training_ROC_tag='ROC/train'
validation_ROC_tag='ROC/validation'
training_pearson_tag='pearson/training'
validation_pearson_tag='pearson/validation'
training_kendal_tag='kendal/training'
validation_kendal_tag='kendal/validation'

In [None]:
def save_model(model, device,epoch, model_dir='models', ):
    model_file_name=f'earlyfusion{epoch}.pth'
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    model_path = os.path.join(model_dir, model_file_name)


    if device == 'cuda':
        model.to('cpu')

    # save the state_dict
    torch.save(model.state_dict(), model_path)
    # transfer the model to gpu.
    if device == 'cuda':
        model.to('cuda')

    return

In [None]:
Mean= [0.485, 0.456, 0.406]
Std= [0.229, 0.224, 0.225]
common_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(Mean, Std)
    ])

trainset = kitti_loader(directory=data_root,transform=common_transforms)
valset = kitti_loader(directory=validation_root,transform=common_transforms)

batch_size = 2
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True,drop_last=True)
valloader= torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=True,drop_last=True)


optimizer = optim.AdamW(
    model.parameters(),
    lr = 0.0001)

num_epochs = 100
device = torch.device("cuda")
model = model.to(device)

kendall = KendallRankCorrCoef(num_outputs=4).cuda()
pearson = PearsonCorrCoef(num_outputs=4).cuda()

t_begin = time.time()
criterion = nn.MSELoss()
for epoch in range(num_epochs):
    running_loss = 0.0
    batch_kendall = []
    batch_pearson = []
    for i, data in enumerate(trainloader):
        input1, input2,label = data

        input1 = input1.cuda()
        input2 = input2.cuda()
        labels = label.cuda().squeeze()
        optimizer.zero_grad()
        outputs_class,outputs_bbox  = model(input1, input2)

        loss = criterion(outputs_bbox, labels.float())
        loss.backward()
        optimizer.step()
        if loss.item!=None:
            running_loss += loss.item()


        batch_kendall.append(kendall (outputs_bbox, labels.float()))
        batch_pearson.append(pearson(outputs_bbox, labels.float()))

    print(batch_kendall)
    kendall_sum = torch.stack(batch_kendall).sum(dim=0)  # sum size of batch
    kendall_mean = kendall_sum / len(trainloader)  # devide len batch
    kendall_mean_scalar = torch.mean(kendall_mean).item()  
    print("kendall_mean_scalar",kendall_mean_scalar)
    pearson_sum = torch.stack(batch_pearson).sum(dim=0)
    pearson_mean = pearson_sum / len(trainloader)
    pearson_mean_scalar = torch.mean(pearson_mean).item()
    print("pearson_mean_scalar",pearson_mean_scalar)
    tb_writer.add_scalar(tag=train_loss_tag, 
             scalar_value=running_loss,
             global_step=epoch)
    
    tb_writer.add_scalar(tag=training_kendal_tag, 
             scalar_value=kendall_mean_scalar,
             global_step=epoch)
    tb_writer.add_scalar(tag=training_pearson_tag, 
             scalar_value=pearson_mean_scalar,
             global_step=epoch)

    elapsed_time = time.time() - t_begin
    speed_epoch = elapsed_time / (epoch + 1)
    eta = speed_epoch * num_epochs - elapsed_time

    print(
        "Elapsed {:.2f}s, {:.2f} s/epoch, ets {:.2f}s".format(
            elapsed_time, speed_epoch, eta
        )
    )


    print(f"Epoch {epoch+1},Loss: {running_loss} ")

    if epoch % 5 == 0 :
        model.eval()
        val_loss = 0
        
        pearson_val = PearsonCorrCoef(num_outputs=4).cuda()
        kendall_val = KendallRankCorrCoef(num_outputs=4).cuda()
        batch_kendall_val = []
        batch_pearson_val = []
        for batch_idx, (data) in enumerate(valloader):
            input1, input2,label = data
            input1 = input1.cuda()
            input2 = input2.cuda()
            labels = label.cuda().squeeze()
    
            with torch.no_grad():
                outputs_class,outputs_bbox  = model(input1, input2)

            batch_kendall_val.append(kendall (outputs_bbox, labels.float()))
            batch_pearson_val.append(pearson(outputs_bbox, labels.float()))
        
        kendall_sum_val = torch.stack(batch_kendall_val).sum(dim=0)
        kendall_mean_val = kendall_sum_val / len(valloader)
        kendall_mean_val_scalar = torch.mean(kendall_mean_val).item()
        
        pearson_sum_val = torch.stack(batch_pearson_val).sum(dim=0)
        pearson_mean_val= pearson_sum_val/ len(valloader)
        pearson_mean_val_scalar = torch.mean(pearson_sum_val).item()

        tb_writer.add_scalar(tag=validation_loss_tag, 
             scalar_value=val_loss,
             global_step=epoch)
        tb_writer.add_scalar(tag=validation_kendal_tag, 
                 scalar_value=kendall_mean_val_scalar,
                 global_step=epoch)
        tb_writer.add_scalar(tag=validation_pearson_tag, 
                 scalar_value=pearson_mean_val_scalar,
                 global_step=epoch)

        save_model(model, device=device,epoch=epoch)



print("Finished Training")