In [1]:
import xmltodict
import os

xml_path = './MAGIC-Lab/images_4asec_2487_addMarkers.xml'

with open(xml_path) as fd:
    doc = xmltodict.parse(fd.read())
camera = doc['document']['chunk']['cameras']['camera']


In [2]:
import numpy as np
import torch
from kornia.geometry import conversions
from kornia.geometry.conversions import QuaternionCoeffOrder as Order

images_path = './MAGIC-Lab/images_4asec'
for img_info in camera:
    path_name = img_info['@label']
    pose_path = f'{images_path}/{path_name}.txt'
    #To avoid key error, using built in function ".get(key, default)".
    if(img_info.get('@enabled', False) and img_info['@enabled'] == 'false'):
        continue
    pose_matrix = img_info['transform']
    pose_matrix = pose_matrix.split()

    with open(f'{pose_path}', 'w') as f:
        for i, el in enumerate(pose_matrix, start = 1):
            if i%4 != 0: 
                f.write(el + "\t")
            else:
                f.write(el + "\n")
                
    T = np.loadtxt(pose_path, dtype='float32') # homogeneous transformation matrix, camera to world
    R = torch.tensor(T[:3, :3]) # rotation matrix
    r = torch.tensor(T[:3, 3]) # translation vector
    q = conversions.rotation_matrix_to_quaternion(R, order=Order.WXYZ) # quaternion
    if q[0] < 0:
        q = -q # constrain q0 > 0
    pose_7params = torch.hstack((r, q))
    pose_7params = pose_7params.numpy()
    
    with open(f'{images_path}/{path_name}_7Params.txt', 'w') as f:
        for el in pose_7params:
            f.write(str(el) + " ")

In [19]:
import numpy as np
import shutil

images_path = './MAGIC-Lab/images_4asec'
dir_list = os.listdir(images_path)
training_nums = 2000

id_list = []
for file in dir_list:
    if file.split('.')[1] == 'txt':
        id_list.append(file.split('.')[0])

training_list = np.random.choice(id_list, training_nums, replace = False)
testing_list = list(set(id_list).difference(set(training_list)))

datasets_path = './MAGIC-Lab/datasets'
if not os.path.isdir(datasets_path):
    os.mkdir(datasets_path)
if not os.path.isdir(f'{datasets_path}/training_set'):
    os.mkdir(f'{datasets_path}/training_set')
if not os.path.isdir(f'{datasets_path}/testing_set'):
    os.mkdir(f'{datasets_path}/testing_set')

training_path = f'{datasets_path}/training_set/'
testing_path = f'{datasets_path}/testing_set/'
for file in dir_list:
    if file.split('.')[0] in training_list:
        shutil.copy(os.path.join(images_path, file), os.path.join(training_path, file))
    if file.split('.')[0] in testing_list:
        shutil.copy(os.path.join(images_path, file), os.path.join(testing_path, file))


# Dataset

In [1]:
import numpy as np
import torch
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from kornia.geometry import conversions
from kornia.geometry.conversions import QuaternionCoeffOrder as Order

In [2]:
class LabDataset(Dataset):
    """Lab Dataset.

    Attributes:
        imgs (torch.Tensor): Images.
        poses (torch.Tensor): Camera poses.
    """

    def __init__(self, path, transform, train=True):
        """
        Args:
            path (str): Path of Lab dataset.
            train (bool): If True, return training data; else, return testing data.
            transform (torchvision.transforms): Image transform.
        """
        dir_path = path

        if train:
            split_path = os.path.join(dir_path, 'training_set')
        else:
            split_path = os.path.join(dir_path, 'testing_set')

        total_frames = int(len(os.listdir(split_path))/2)
        all_list = os.listdir(split_path)
        img_list = []
        for file in all_list:
            if(file.split('.')[-1] != "txt"):
                img_list.append(file + "")
                
        first_img_dir = img_list[0]
        img_temp = Image.open(os.path.join(split_path, first_img_dir)) 
        print(img_temp)
        img_temp = transform(img_temp)
        print(img_temp)
        height, width = img_temp.shape[-2:]
        self.imgs = torch.empty(total_frames, 3, height, width)
        self.poses = torch.empty(total_frames, 7)

        index = 0
        for file in img_list:
            img_path = os.path.join(split_path, file)
            img = Image.open(img_path)
            img = transform(img)
            self.imgs[index] = img
            pose_txt_path = os.path.join(split_path, f"{file.split('.')[0]}.txt")
            T = np.loadtxt(pose_txt_path, dtype='float32') # homogeneous transformation matrix, camera to world
            R = torch.tensor(T[:3, :3]) # rotation matrix
            r = torch.tensor(T[:3, 3]) # translation vector
            q = conversions.rotation_matrix_to_quaternion(R, order=Order.WXYZ) # quaternion
            if q[0] < 0:
                q = -q # constrain q0 > 0
            pose = torch.hstack((r, q))
            self.poses[index] = pose
            index += 1

    def __len__(self):
        return self.imgs.shape[0]

    def __getitem__(self, idx):
        """
        Args:
            idx (int): Input index.

        Returns:
            {'image' (torch.Tensor): Image,
             'pose' (torch.Tensor): Pose of the image.}

        Shape:
            {'image': (3, height, width),
             'pose': (7).}
        """
        return {'image': self.imgs[idx],
                'pose': self.poses[idx]}


In [3]:
path = 'lab_data/datasets'
resize = 1080
batch_size = 16
num_workers = 0
pin_memory = True
lr = 0.001
mean = [0.4702,0.4557,0.4363] #calculated by get_datas_mean_std function
std = [0.2696,0.2698,0.2564]
epochs = 300
checkpoint_epochs = 10
transform = transforms.Compose([
    transforms.Resize(size=(resize,resize)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [4]:
from torch.utils.data import DataLoader
trainset = LabDataset(path, transform=transform, train=True)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True,
                          num_workers=num_workers, pin_memory=pin_memory,
                          drop_last=True)

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1920x1080 at 0x228AB2D2B00>
tensor([[[ 0.9178,  0.9178,  0.9178,  ...,  1.3106,  1.3106,  1.3106],
         [ 0.9178,  0.9178,  0.9178,  ...,  1.3106,  1.3106,  1.3106],
         [ 0.9178,  0.9178,  0.9178,  ...,  1.3106,  1.3106,  1.3106],
         ...,
         [-1.1913, -0.8131, -0.2167,  ...,  0.0887,  0.0887,  0.1178],
         [-1.2059, -0.8713, -0.3040,  ...,  0.1178,  0.1178,  0.1324],
         [-1.2059, -0.9150, -0.3622,  ...,  0.1324,  0.1324,  0.1469]],

        [[ 0.9709,  0.9709,  0.9709,  ...,  1.3197,  1.3197,  1.3197],
         [ 0.9709,  0.9709,  0.9709,  ...,  1.3197,  1.3197,  1.3197],
         [ 0.9709,  0.9709,  0.9709,  ...,  1.3197,  1.3197,  1.3197],
         ...,
         [-1.1076, -0.7297, -0.1338,  ...,  0.1569,  0.1569,  0.1860],
         [-1.1222, -0.7879, -0.2210,  ...,  0.1860,  0.1860,  0.2005],
         [-1.1222, -0.8315, -0.2791,  ...,  0.2005,  0.2005,  0.2151]],

        [[ 1.0973,  1.0973,  1.09