In [23]:
import os
import json
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import yaml
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18, resnet50
import torchvision.models.detection as detection

In [24]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

def load_class_info(yaml_file):
    with open(yaml_file, 'r') as file:
            class_info = yaml.safe_load(file)
    return class_info['classes']

In [25]:
class NormalDataLoader(Dataset):
    def __init__(self, dataset_folder, class_info_file, transform=None, demo=False):
        self.dataset_folder = dataset_folder
        self.transform = transform
        # self.label_transform = label_transform
        self.demo = demo
        self.imgs_files = self.load_data(dataset_folder)
        self.class_names = load_class_info(class_info_file)
        self.num_classes = len(self.class_names)
        # self.normalize_keypoints = normalize_keypoints
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(self.class_names)}
        print(self.class_to_idx)

    def load_data(self, dataset_folder):
        images_path = os.path.join(self.dataset_folder,"images/")
        annotations_path = os.path.join(self.dataset_folder,"annotations/")
        j_data = []
        for file in os.listdir(images_path):
            if file.endswith(".jpg"):
                json_path = os.path.join(annotations_path, file.split('.')[0] + '.json')
                with open(json_path) as f:
                    json_load = json.load(f)
                    for item in json_load['shapes']:
                        points = [value for row in item['points'] for value in row]
                        j_data.append({'image':  os.path.join(images_path,file),
                                     'label': item['label'],
                                     'points':points })
        json_data = pd.DataFrame(j_data)
        return json_data

    def get_keypoint(self, bboxes):
        centers = []
        for bbox in bboxes:
            center_x = (bbox[0] + bbox[2]) / 2
            center_y = (bbox[1] + bbox[3]) / 2
            centers.append((center_x, center_y))
        return centers
    
    def __len__(self):
        return len(self.imgs_files)

    def __getitem__(self, idx):
        target = {}
        img_path, label, bboxes_original = self.imgs_files.iloc[idx]
        bboxes_original = [bboxes_original]
        label = torch.tensor(self.class_to_idx[label], dtype=torch.int32)
        keypoint_original = torch.tensor(self.get_keypoint(bboxes_original), dtype=torch.float32)
        # img_original = cv2.imread(img_path)
        # img_original = cv2.cvtColor(img_original, cv2.COLOR_BGR2RGB)
        # img_original = read_image(img_path).float() / 255.0
        img_original = Image.open(img_path).convert('RGB')

        if self.transform:
            img_original = self.transform(img_original)

        return img_original, keypoint_original, label

In [26]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [27]:
class ResNet18Model(nn.Module):
    def __init__(self, num_classes, num_keypoints=1):
        super(ResNet18Model, self).__init__()
        self.backbone = resnet18(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes + num_keypoints * 2) # 2 for each keypoint (x, y)
        self.num_keypoints = num_keypoints
        
    def forward(self, x):
        outputs = self.backbone(x)
        keypoints = outputs[:, -self.num_keypoints * 2:]  # Last 2*num_keypoints values are keypoints
        class_logits = outputs[:, :-self.num_keypoints * 2]  # The rest are class logits
        return class_logits, keypoints

class ResNet50Model(nn.Module):
    def __init__(self, num_classes, num_keypoints=1):
        super(ResNet50Model, self).__init__()
        self.backbone = resnet50(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes + num_keypoints * 2) # 2 for each keypoint (x, y)
        self.num_keypoints = num_keypoints
        
    def forward(self, x):
        outputs = self.backbone(x)
        keypoints = outputs[:, -self.num_keypoints * 2:]  # Last 2*num_keypoints values are keypoints
        class_logits = outputs[:, :-self.num_keypoints * 2]  # The rest are class logits
        return class_logits, keypoints

In [28]:
def LaplaceNLLLoss(input_data, target, scale, eps=1e-06, reduction='mean'):
    device = input_data.device
    target = target.to(device)
    scale = scale.to(device)
    
    # loss = torch.log(2*scale) + torch.abs(input_data - target)/scale

    # Inputs and targets much have same shape
    input_data = input_data.view(input_data.size(0), -1)
    target = target.view(target.size(0), -1)
    if input_data.size() != target.size():
        raise ValueError("input and target must have same size")

    # Second dim of scale must match that of input or be equal to 1
    scale = scale.view(input_data.size(0), -1)
    if scale.size(1) != input_data.size(1) and scale.size(1) != 1:
        raise ValueError("scale is of incorrect size")

    # Check validity of reduction mode
    if reduction != 'none' and reduction != 'mean' and reduction != 'sum':
        raise ValueError(reduction + " is not valid")
        
    # Entries of var must be non-negative
    if torch.any(scale < 0):
        raise ValueError("scale has negative entry/entries")

    # Clamp for stability
    scale = scale.clone()
    with torch.no_grad():
        scale.clamp_(min=eps)

    # Calculate loss (without constant)
    loss = (torch.log(2*scale) + torch.abs(input_data - target) / scale).view(input_data.size(0), -1).sum(dim=1)


    # Apply reduction
    if reduction == 'mean':
        return loss.mean()
    elif reduction == 'sum':
        return loss.sum()
    else:
        return loss

In [74]:
# def train_model(model, dataloader, criterion_cls, criterion_kpt, optimizer, num_epochs=10):
#     train_losses = []
#     key_train_losses = []
    
#     for epoch in range(num_epochs):
#         model.train()
#         total_train_loss = 0.0
#         total_kpt_loss = 0.0

#         for inputs, keypoints, labels in dataloader:
#             optimizer.zero_grad()
#             class_logits, predicted_keypoints = model(inputs)
#             loss_classification = criterion_cls(class_logits, labels)
#             loss_keypoints = criterion_kpt(predicted_keypoints, keypoints)
#             loss_cls = loss_classification 
#             loss_kpt = loss_keypoints
#             loss_cls.backward(retain_graph=True)
#             loss_kpt.backward()
#             optimizer.step()
#             total_train_loss += loss_cls.item()
#             total_kpt_loss += loss_kpt.item()

#         avg_train_loss = total_train_loss / len(dataloader.dataset)
#         avg_kpt_loss = total_kpt_loss / len(dataloader.dataset)
        
#         print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss}, Train Keypoint Loss: {avg_kpt_loss}')
        
#         train_losses.append(avg_train_loss)
#         key_train_losses.append(avg_kpt_loss)

#     return model, train_losses, key_train_losses

def train_model(device, model, criterion_cls, criterion_kpt, optimizer, train_loader, valid_loader, num_epochs=25):
    train_losses = []
    valid_losses = []
    key_train_losses = []
    key_valid_losses = []
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0
        total_kpt_loss = 0.0

        for images, keypoints, labels in train_loader:
            labels = torch.stack([label.to(device) for label in labels])
            images = torch.stack(images).to(device)
            ground_truth_keypoints = torch.stack([keypoint.to(device) for keypoint in keypoints])

            # labels_tensor = torch.stack(labels)
            print(labels)
            class_indices = labels
            # print('labels shape: ', labels.count)
            optimizer.zero_grad()
            # print(abc)

            class_outputs, keypoint_outputs = model(images)
            print(class_outputs)
            scale = torch.rand(keypoint_outputs.shape) + 1e-6
            loss_cls = criterion_cls(class_outputs, class_indices)
            # loss_kpt=None
            loss_kpt = LaplaceNLLLoss(keypoint_outputs, ground_truth_keypoints, scale)
                # loss_kpt = criterion_kpt(keypoint_outputs, ground_truth_keypoints)
            class_loss = loss_cls 
            kpt_loss = loss_kpt
            # total_loss = loss_cls + loss_kpt # Combine losses for backpropagation
            class_loss.backward(retain_graph=True)
            kpt_loss.backward()
            
            optimizer.step()
            total_train_loss += loss_cls.item()
            total_kpt_loss += loss_kpt.item()

        avg_train_loss = total_train_loss / len(train_loader.dataset)
        avg_kpt_loss = total_kpt_loss / len(train_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss}, Train Keypoint Loss: {avg_kpt_loss}')

        model.eval()
        total_val_loss = 0.0
        total_val_kpt_loss = 0.0

        with torch.no_grad():
            for images, keypoints, labels in valid_loader:
                labels = torch.stack([label.to(device) for label in labels])
                images = torch.stack(images).to(device)
                ground_truth_keypoints = torch.stack([keypoint.to(device) for keypoint in keypoints])

                labels_tensor = torch.stack(labels)
                class_indices = torch.argmax(labels_tensor, dim=1)

                class_outputs, keypoint_outputs = model(images)
                
                scale = torch.rand(keypoint_outputs.shape) + 1e-6
                loss_cls = criterion_cls(class_outputs, class_indices)
                loss_kpt = LaplaceNLLLoss(keypoint_outputs, ground_truth_keypoints, scale)
                    # loss_kpt = criterion_kpt(keypoint_outputs, ground_truth_keypoints)
                
                total_val_loss += loss_cls.item() 
                total_val_kpt_loss += loss_kpt.item()

        avg_val_loss = total_val_loss / len(valid_loader.dataset)
        avg_val_kpt_loss = total_val_kpt_loss / len(valid_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss}, Val Keypoint Loss: {avg_val_kpt_loss}')
        
        train_losses.append(avg_train_loss)
        valid_losses.append(avg_val_loss)
        key_train_losses.append(avg_kpt_loss)
        key_valid_losses.append(avg_val_kpt_loss)

    return model, train_losses, key_train_losses, valid_losses, key_valid_losses  # Optionally return the model

In [75]:
def train_ssd(model, dataloader, criterion_cls, criterion_kpt, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, keypoints, labels in dataloader:
            targets = [{'boxes': keypoints, 'labels': labels}]
            loss_dict = model(inputs, targets)
            classification_loss = criterion_cls(loss_dict['cls_logits'], labels)
            keypoints_loss = criterion_kpt(loss_dict['boxes'], keypoints)
            loss_cls = classification_loss
            loss_kpt = keypoints_loss
            optimizer.zero_grad()
            loss_cls.backward(retain_graph=True)
            loss_kpt.backward()
            optimizer.step()
            running_loss += (loss_cls.item() + loss_kpt.item())

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader)}")


In [76]:
def plots_all(loss, epochs, label_loss, path):
    plt.figure(figsize=(10, 6))
    plt.plot(loss, label=label_loss)
    plt.title(label_loss+' Over'+str(epochs)+' Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig(path)
    plt.show()

In [77]:
class_config_path = './../config/formated_class.yaml'
DATASET_FOLDER_TRAIN = './../../../RnD_datasets/robocup_dataset'
train_path = os.path.join(DATASET_FOLDER_TRAIN,"train/")
val_path = os.path.join(DATASET_FOLDER_TRAIN,"val/")

num_classes = len(load_class_info(class_config_path))

train_dataset = NormalDataLoader(train_path, class_config_path, transform=transform, demo=True)
val_dataset = NormalDataLoader(val_path, class_config_path, transform=transform, demo=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.9, weight_decay=0.0005) #SSD

{'AllenKey': 0, 'Axis2': 1, 'Bearing2': 2, 'Drill': 3, 'F20_20_B': 4, 'F20_20_G': 5, 'Housing': 6, 'M20': 7, 'M20_100': 8, 'M30': 9, 'Motor2': 10, 'S40_40_B': 11, 'S40_40_G': 12, 'Screwdriver': 13, 'Spacer': 14, 'Wrench': 15, 'container_blue': 16, 'container_red': 17}
{'AllenKey': 0, 'Axis2': 1, 'Bearing2': 2, 'Drill': 3, 'F20_20_B': 4, 'F20_20_G': 5, 'Housing': 6, 'M20': 7, 'M20_100': 8, 'M30': 9, 'Motor2': 10, 'S40_40_B': 11, 'S40_40_G': 12, 'Screwdriver': 13, 'Spacer': 14, 'Wrench': 15, 'container_blue': 16, 'container_red': 17}


In [78]:
model = ResNet18Model(num_classes=num_classes)
model.to(device)

criterion_classification = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion_keypoints = nn.MSELoss()

epochs = 100

trained_model, train_loss, key_train_loss, valid_losses, key_valid_losses = train_model(device, model, criterion_classification, criterion_keypoints, optimizer, train_loader, valid_loader, num_epochs=epochs)

torch.save(model.state_dict(), './normal/models/resnet18_normal_100.pth')

plots_all(train_loss, epochs, 'Training Loss', './normal/plots/resnet18_normal_training_loss_100_epochs.png')
plots_all(key_train_loss, epochs, 'Keypoint Training Loss', './normal/plots/resnet18_normal_keypoint_training_loss_100_epochs.png')

tensor([ 2, 12,  6,  4, 13,  6, 11,  0, 11, 12,  5,  5,  0,  2, 14, 12,  5, 12,
         3, 12,  4,  2,  4, 12,  9, 11, 13,  5, 10,  4, 14,  0],
       device='cuda:0', dtype=torch.int32)
tensor([[-1.3732e-02, -2.1905e-01, -2.3761e-02, -3.7269e-01, -5.4984e-01,
         -9.0506e-01, -2.3215e-01,  2.0684e-02,  3.3189e-01, -1.7693e-01,
         -1.9543e-01, -2.9541e-01, -4.2110e-01,  6.5649e-01, -7.3906e-01,
         -5.0231e-02,  1.4081e-01,  8.2963e-01],
        [ 6.4222e-01, -1.5526e-01, -1.0694e+00,  1.4538e-01, -5.2045e-01,
         -5.0094e-01, -1.2113e-01, -4.4936e-02,  3.4638e-01,  2.9641e-01,
          2.3925e-01, -8.4414e-01,  8.9996e-02,  4.5097e-01, -6.5466e-01,
          2.6347e-01, -4.8234e-01,  8.3424e-01],
        [ 1.0573e+00, -6.9146e-01,  2.7343e-02, -4.6398e-03,  1.6400e-01,
          2.6010e-02,  2.5012e-01, -1.0207e+00, -5.1289e-04,  4.4597e-01,
          6.8850e-01, -9.4987e-01, -2.8960e-01,  1.2757e+00, -9.2715e-01,
          4.1576e-01, -1.2474e-01,  1.5011e+00],

RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Int'

In [None]:
model = ResNet50Model(num_classes=num_classes)
model.to(device)

criterion_classification = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion_keypoints = nn.MSELoss()

epochs = 100

trained_model, train_loss, key_train_loss = train_model(model, dataloader, criterion_classification, criterion_keypoints, optimizer, num_epochs=epochs)

torch.save(model.state_dict(), './normal/models/resnet50_normal_100.pth')

plots_all(train_loss, epochs, 'Training Loss', './normal/plots/resnet50_normal_training_loss_100_epochs.png')
plots_all(key_train_loss, epochs, 'Keypoint Training Loss', './normal/plots/resnet50_normal_keypoint_training_loss_100_epochs.png')