In [1]:
'''
VEHICLES:这取决于你感兴趣的车辆类型,例如,可以设为 ["Car", "Van", "Truck"]。

BIN:可能的值可能在10到90之间,这取决于你需要多精细的方向划分。一个常用的值是30,即每个方向区间为12度。

OVERLAP:这通常需要设置为0到1之间的一个较小值。可能的值可以是0.1。

MAX_JIT:这取决于你希望有多大的随机边界框偏移。如果你的图像大小是224,那么可能的值可以是30。

NORM_H, NORM_W:这需要根据你的模型的输入要求进行设置。常见的值可能是224,这是许多卷积神经网络的默认输入大小。

label_dir, image_dir:这需要设置为你的标签文件和图像文件的实际存储路径。

batch_size:这需要根据你的硬件资源和模型大小进行设置。一个常见的值可能是32,但如果你的模型非常大或者你的硬件资源有限,你可能需要选择一个更小的值。
'''

BIN, OVERLAP = 30, 0.1
MAX_JIT = 30
NORM_H, NORM_W = 224, 224
VEHICLES = ['Car', 'Van', 'Truck', 'Pedestrian', 'Sitter', 'Cyclist', 'Tram', 'Misc']
BATCH_SIZE = 8

label_dir = 'E:/Codespace/image-to-3d-bbox/data/image_to_train/data_object_label_2/training/label_2/'
image_dir = 'E:/Codespace/image-to-3d-bbox/data/image_to_train/data_object_image_2/training/image_2/'

In [4]:
import copy
import os

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf

from parameters import (BATCH_SIZE, BIN, MAX_JIT, NORM_H, NORM_W,
                        OVERLAP, VEHICLES, image_dir, label_dir)


def compute_anchors(angle):
    anchors = []

    spaceangle = 2*np.pi / BIN
    l_index = int(angle/spaceangle)
    r_index = l_index + 1

    if (angle - l_index*spaceangle) < spaceangle/2 * (1 + OVERLAP/2):
        anchors.append([l_index, angle - l_index*spaceangle])

    if (r_index*spaceangle - angle) < spaceangle/2 * (1+OVERLAP/2):
        anchors.append([r_index % BIN, angle - r_index*spaceangle])

    return anchors


def parse_annotation(label_dir, image_dir):
    all_objs = []
    dims_avg = {key: np.array([0, 0, 0]) for key in VEHICLES}
    dims_cnt = {key: 0 for key in VEHICLES}

    for label_file in os.listdir(label_dir):
        image_file = label_file.replace('txt', 'png')

        for line in open(label_dir + label_file).readlines():
            line = line.strip().split(' ')
            truncated = np.abs(float(line[1]))
            occluded = np.abs(float(line[2]))

            if line[0] in VEHICLES and truncated < 0.1 and occluded < 0.1:
                new_alpha = float(line[3]) + np.pi/2.
                if new_alpha < 0:
                    new_alpha = new_alpha + 2.*np.pi
                new_alpha = new_alpha - int(new_alpha/(2.*np.pi))*(2.*np.pi)

                obj = {'name': line[0],
                       'image': image_file,
                       'xmin': int(float(line[4])),
                       'ymin': int(float(line[5])),
                       'xmax': int(float(line[6])),
                       'ymax': int(float(line[7])),
                       'dims': np.array([float(number) for number in line[8:11]]),
                       'new_alpha': new_alpha
                       }

                dims_avg[obj['name']] = dims_cnt[obj['name']] * \
                    dims_avg[obj['name']] + obj['dims']
                dims_cnt[obj['name']] += 1
                dims_avg[obj['name']] /= dims_cnt[obj['name']]

                all_objs.append(obj)

    return all_objs, dims_avg


all_objs, dims_avg = parse_annotation(label_dir, image_dir)

for obj in all_objs:
    # Fix dimensions
    obj['dims'] = obj['dims'] - dims_avg[obj['name']]

    # Fix orientation and confidence for no flip
    orientation = np.zeros((BIN, 2))
    confidence = np.zeros(BIN)

    anchors = compute_anchors(obj['new_alpha'])

    for anchor in anchors:
        orientation[anchor[0]] = np.array(
            [np.cos(anchor[1]), np.sin(anchor[1])])
        confidence[anchor[0]] = 1

    confidence = confidence / np.sum(confidence)

    obj['orient'] = orientation
    obj['conf'] = confidence


def prepare_input_and_output(train_inst):
    # Prepare image patch
    xmin = train_inst['xmin']
    ymin = train_inst['ymin']
    xmax = train_inst['xmax']
    ymax = train_inst['ymax']

    img = cv2.imread(image_dir + train_inst['image'])
    img = copy.deepcopy(img[ymin:ymax+1, xmin:xmax+1]).astype(np.float32)

    # re-color the image
    img += np.random.randint(-2, 3, img.shape).astype('float32')
    t = [np.random.uniform(), np.random.uniform(), np.random.uniform()]
    t = np.array(t)

    img = img * (1 + t)
    img = img / (255 * 2)

    # Add rotation (rotate between -20 and 20 degrees)
    angle = np.random.uniform(-20, 20)
    M = cv2.getRotationMatrix2D((img.shape[1]/2, img.shape[0]/2), angle, 1)
    img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

    # Add scaling
    scale_factor = np.random.uniform(0.8, 1.2)
    img = cv2.resize(img, None, fx=scale_factor, fy=scale_factor)

    # Update position and dimensions according to scaling
    train_inst['xmin'] *= scale_factor
    train_inst['ymin'] *= scale_factor
    train_inst['xmax'] *= scale_factor
    train_inst['ymax'] *= scale_factor
    train_inst['dims'] *= scale_factor

    # Compute center and relative position to the center
    center = [img.shape[1]/2, img.shape[0]/2]
    pos = [(train_inst['xmax']+train_inst['xmin'])/2, (train_inst['ymax']+train_inst['ymin'])/2]
    rel_pos = [pos[0]-center[0], pos[1]-center[1]]

    # Compute new position after rotation
    new_rel_pos = [rel_pos[0]*np.cos(np.deg2rad(angle)) - rel_pos[1]*np.sin(np.deg2rad(angle)),
                   rel_pos[0]*np.sin(np.deg2rad(angle)) + rel_pos[1]*np.cos(np.deg2rad(angle))]
    new_pos = [new_rel_pos[0]+center[0], new_rel_pos[1]+center[1]]

    # Update position
    train_inst['xmin'] = new_pos[0] - train_inst['dims'][0] / 2
    train_inst['xmax'] = new_pos[0] + train_inst['dims'][0] / 2
    train_inst['ymin'] = new_pos[1] - train_inst['dims'][1] / 2
    train_inst['ymax'] = new_pos[1] + train_inst['dims'][1] / 2

    # Update orientation according to rotation
    train_inst['new_alpha'] += np.deg2rad(angle)
    train_inst['new_alpha'] %= 2 * np.pi

    # Add random cropping
    h, w, _ = img.shape
    new_h = int(h * np.random.uniform(0.8, 1))
    new_w = int(w * np.random.uniform(0.8, 1))
    start_x = np.random.randint(0, w - new_w)
    start_y = np.random.randint(0, h - new_h)
    img = img[start_y:start_y + new_h, start_x:start_x + new_w]

    # Update position according to cropping
    train_inst['xmin'] -= start_x
    train_inst['ymin'] -= start_y
    train_inst['xmax'] -= start_x
    train_inst['ymax'] -= start_y

    # Flip the image
    flip = np.random.binomial(1, 0.5)
    if flip > 0.5:
        img = cv2.flip(img, 1)
        train_inst['new_alpha'] = 2.*np.pi - train_inst['new_alpha']

    # Resize the image to standard size
    img = cv2.resize(img, (NORM_H, NORM_W))
    img = img - np.array([[[103.939, 116.779, 123.68]]])

    # Fix orientation and confidence
    orientation = np.zeros((BIN, 2))
    confidence = np.zeros(BIN)

    anchors = compute_anchors(train_inst['new_alpha'])

    for anchor in anchors:
        orientation[anchor[0]] = np.array(
            [np.cos(anchor[1]), np.sin(anchor[1])])
        confidence[anchor[0]] = 1

    confidence = confidence / np.sum(confidence)

    train_inst['orient_flipped'] = orientation
    train_inst['conf_flipped'] = confidence

    # Fix orientation and confidence
    if flip > 0.5:
        return img, train_inst['dims'], train_inst['orient_flipped'], train_inst['conf_flipped']
    else:
        return img, train_inst['dims'], train_inst['orient'], train_inst['conf']


def data_gen(all_objs, batch_size):
    num_obj = len(all_objs)

    keys = list(range(num_obj))
    np.random.shuffle(keys)

    l_bound = 0
    r_bound = batch_size if batch_size < num_obj else num_obj

    while True:
        if l_bound == r_bound:
            l_bound = 0
            r_bound = batch_size if batch_size < num_obj else num_obj
            np.random.shuffle(keys)

        currt_inst = 0
        x_batch = np.zeros((r_bound - l_bound, 224, 224, 3))
        d_batch = np.zeros((r_bound - l_bound, 3))
        o_batch = np.zeros((r_bound - l_bound, BIN, 2))
        c_batch = np.zeros((r_bound - l_bound, BIN))

        for key in keys[l_bound:r_bound]:
            # augment input image and fix object's orientation and confidence
            image, dimension, orientation, confidence = prepare_input_and_output(
                all_objs[key])

            # plt.figure(figsize=(5,5))
            # plt.imshow(image/255./2.); plt.show()
            # print dimension
            # print orientation
            # print confidence

            x_batch[currt_inst, :] = image
            d_batch[currt_inst, :] = dimension
            o_batch[currt_inst, :] = orientation
            c_batch[currt_inst, :] = confidence

            currt_inst += 1

        yield x_batch, [d_batch, o_batch, c_batch]

        l_bound = r_bound
        r_bound = r_bound + batch_size
        if r_bound > num_obj:
            r_bound = num_obj


def l2_normalize(x):
    return tf.nn.l2_normalize(x, axis=2)


In [5]:
# Construct the network
# Use ResNet or similar more modern architecture
from keras.applications.resnet import ResNet50
from keras.layers import Dense, Input
from keras.layers.activation import LeakyReLU
from keras.layers.convolutional import (Conv2D, Convolution2D, MaxPooling2D,
                                        ZeroPadding2D)
from keras.layers.core import Dense, Dropout, Flatten, Lambda, Reshape
from keras.models import Model
from keras.regularizers import l2

from parameters import BIN
from preprocessdata import l2_normalize

# Load ResNet model, pre-trained on ImageNet; exclude top FC layer
base_model = ResNet50(weights='imagenet', include_top=False,
                      input_shape=(224, 224, 3))

# Make base_model layers non-trainable
for layer in base_model.layers:
    layer.trainable = False

# You can choose to retrain some of the higher layers
for layer in base_model.layers[-4:]:
    layer.trainable = True

# Now add your custom layers
x = base_model.output
x = Flatten()(x)

# Regularization strength
reg_strength = 0.01

dimension = Dense(512, kernel_regularizer=l2(reg_strength))(x)
dimension = LeakyReLU(alpha=0.1)(dimension)
dimension = Dropout(0.5)(dimension)
dimension = Dense(3)(dimension)
dimension = LeakyReLU(alpha=0.1, name='dimension')(dimension)

orientation = Dense(256, kernel_regularizer=l2(reg_strength))(x)
orientation = LeakyReLU(alpha=0.1)(orientation)
orientation = Dropout(0.5)(orientation)
orientation = Dense(BIN*2)(orientation)
orientation = LeakyReLU(alpha=0.1)(orientation)
orientation = Reshape((BIN, -1))(orientation)
orientation = Lambda(l2_normalize, name='orientation')(orientation)

confidence = Dense(256, kernel_regularizer=l2(reg_strength))(x)
confidence = LeakyReLU(alpha=0.1)(confidence)
confidence = Dropout(0.5)(confidence)
confidence = Dense(BIN, activation='softmax', name='confidence')(confidence)

model = Model(base_model.input, outputs=[dimension, orientation, confidence])


In [None]:
import os

import numpy as np
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD

from network import model
from preprocessdata import all_objs, data_gen

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


def orientation_loss(y_true, y_pred):
    # Find number of anchors
    anchors = tf.reduce_sum(tf.square(y_true), axis=2)
    anchors = tf.greater(anchors, tf.constant(0.5))
    anchors = tf.reduce_sum(tf.cast(anchors, tf.float32), 1)

    # Define the loss
    loss = -(y_true[:, :, 0]*y_pred[:, :, 0] + y_true[:, :, 1]*y_pred[:, :, 1])
    loss = tf.reduce_sum(loss, axis=1)
    loss = loss / anchors

    return tf.reduce_mean(loss)


# Early stopping to prevent overfitting
early_stop = EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=10,
    mode='min',
    verbose=1
)

# Save the best models during training
checkpoint = ModelCheckpoint(
    'weights.hdf5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min',
    save_freq='epoch'
)

# TensorBoard for visualizing training progress
tensorboard = TensorBoard(
    log_dir='../logs/',
    histogram_freq=0,
    write_graph=True,
    write_images=False
)

all_exams = len(all_objs)
trv_split = int(0.9*all_exams)
batch_size = 8

# Shuffle the dataset
np.random.shuffle(all_objs)

# Split data into training and validation sets
train_gen = data_gen(all_objs[:trv_split], batch_size)
valid_gen = data_gen(all_objs[trv_split:all_exams], batch_size)

# Calculate the number of batches for training and validation sets
train_num = int(np.ceil(trv_split/batch_size))
valid_num = int(np.ceil((all_exams - trv_split)/batch_size))

minimizer = SGD(learning_rate=0.0001)
model.compile(optimizer='adam',  # minimizer,
              loss={'dimension': 'mean_squared_error',
                    'orientation': orientation_loss, 'confidence': 'mean_squared_error'},
              loss_weights={'dimension': 1., 'orientation': 1., 'confidence': 1.})
model.fit(train_gen, steps_per_epoch=train_num,
          epochs=5,  # 500
          verbose=1,
          validation_data=valid_gen,
          validation_steps=valid_num,
          callbacks=[early_stop, checkpoint, tensorboard],
          max_queue_size=3,
          workers=1, use_multiprocessing=False)


In [None]:
import os

import numpy as np
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD

from network import model
from preprocessdata import all_objs, data_gen

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


def orientation_loss(y_true, y_pred):
    # Find number of anchors
    anchors = tf.reduce_sum(tf.square(y_true), axis=2)
    anchors = tf.greater(anchors, tf.constant(0.5))
    anchors = tf.reduce_sum(tf.cast(anchors, tf.float32), 1)

    # Define the loss
    loss = -(y_true[:, :, 0]*y_pred[:, :, 0] + y_true[:, :, 1]*y_pred[:, :, 1])
    loss = tf.reduce_sum(loss, axis=1)
    loss = loss / anchors

    return tf.reduce_mean(loss)


# Early stopping to prevent overfitting
early_stop = EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=10,
    mode='min',
    verbose=1
)

# Save the best models during training
checkpoint = ModelCheckpoint(
    'weights.hdf5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min',
    save_freq='epoch'
)

# TensorBoard for visualizing training progress
tensorboard = TensorBoard(
    log_dir='../logs/',
    histogram_freq=0,
    write_graph=True,
    write_images=False
)

all_exams = len(all_objs)
trv_split = int(0.9*all_exams)
batch_size = 8

# Shuffle the dataset
np.random.shuffle(all_objs)

# Split data into training and validation sets
train_gen = data_gen(all_objs[:trv_split], batch_size)
valid_gen = data_gen(all_objs[trv_split:all_exams], batch_size)

# Calculate the number of batches for training and validation sets
train_num = int(np.ceil(trv_split/batch_size))
valid_num = int(np.ceil((all_exams - trv_split)/batch_size))

minimizer = SGD(learning_rate=0.0001)
model.compile(optimizer='adam',  # minimizer,
              loss={'dimension': 'mean_squared_error',
                    'orientation': orientation_loss, 'confidence': 'mean_squared_error'},
              loss_weights={'dimension': 1., 'orientation': 1., 'confidence': 1.})
model.fit(train_gen, steps_per_epoch=train_num,
          epochs=5,  # 500
          verbose=1,
          validation_data=valid_gen,
          validation_steps=valid_num,
          callbacks=[early_stop, checkpoint, tensorboard],
          max_queue_size=3,
          workers=1, use_multiprocessing=False)


In [None]:
import os

import cv2
import numpy as np

from network import model
from parameters import (BATCH_SIZE, BIN, MAX_JIT, NORM_H, NORM_W,
                        OVERLAP, VEHICLES, image_dir, label_dir)
from preprocessdata import dims_avg

model.load_weights('weights.hdf5')
image_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/image_00/data/'
box2d_loc = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/box_2d/'
box3d_loc = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/box_3d/'

all_image = sorted(os.listdir(image_dir))
# np.random.shuffle(all_image)

for f in all_image:
    image_file = image_dir + f
    box2d_file = box2d_loc + f.replace('png', 'txt')
    box3d_file = box3d_loc + f.replace('png', 'txt')

    with open(box3d_file, 'w') as box3d:
        img = cv2.imread(image_file)
        img = img.astype(np.float32, copy=False)
        open(box2d_file, 'w')
        for line in open(box2d_file):
            line = line.strip().split(' ')
            truncated = np.abs(float(line[1]))
            occluded = np.abs(float(line[2]))

            obj = {'xmin': int(float(line[4])),
                   'ymin': int(float(line[5])),
                   'xmax': int(float(line[6])),
                   'ymax': int(float(line[7])),
                   }

            patch = img[obj['ymin']:obj['ymax'], obj['xmin']:obj['xmax']]
            patch = cv2.resize(patch, (NORM_H, NORM_W))
            patch = patch - np.array([[[103.939, 116.779, 123.68]]])
            patch = np.expand_dims(patch, 0)

            prediction = model.predict(patch)

            # Transform regressed angle
            max_anc = np.argmax(prediction[2][0])
            anchors = prediction[1][0][max_anc]

            if anchors[1] > 0:
                angle_offset = np.arccos(anchors[0])
            else:
                angle_offset = -np.arccos(anchors[0])

            wedge = 2.*np.pi/BIN
            angle_offset = angle_offset + max_anc*wedge
            angle_offset = angle_offset % (2.*np.pi)

            angle_offset = angle_offset - np.pi/2
            if angle_offset > np.pi:
                angle_offset = angle_offset - (2.*np.pi)

            line[3] = str(angle_offset)

            # Transform regressed dimension
            dims = dims_avg['Car'] + prediction[0][0]

            line = line + list(dims)

            # Write regressed 3D dim and oritent to file
            line = ' '.join([str(item) for item in line]) + '/n'
            box3d.write(line)

            cv2.rectangle(img, (obj['xmin'], obj['ymin']),
                          (obj['xmax'], obj['ymax']), (255, 0, 0), 3)

    # plt.figure(figsize=(10,10))
    # plt.imshow(img/255.)
    # plt.show()


In [None]:
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np

import parseTrackletXML as xmlParser

label_path = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/label_00/'
image_path = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/image_00/data/'
# calib_path = '/home/husky/data/kitti_object/data_object_calib/training/calib/'

predi_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/predict_00/'
image_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/image_00/data/'
calib_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/calib_00/'

box2d_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/box_2d/'
box3d_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/box_3d/'
label_dir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/2011_09_26_drive_0014_sync/label_00/'

dataset = [name.split('.')[0] for name in sorted(os.listdir(predi_dir))]
video_out = 'E:/workspace/2dto3d/data/kitti_3D.avi'
video_writer = None

all_image = sorted(os.listdir(image_dir))
# np.random.shuffle(all_image)

for f in all_image:
    image_file = image_dir + f
    calib_file = calib_dir + f.replace('png', 'txt')
    predi_file = predi_dir + f.replace('png', 'txt')

    # read calibration data
    open(calib_file, 'w')
    open(predi_file, 'w')
    for line in open(calib_file):
        if 'P2:' in line:
            cam_to_img = line.strip().split(' ')
            cam_to_img = np.asarray([float(number)
                                    for number in cam_to_img[1:]])
            cam_to_img = np.reshape(cam_to_img, (3, 4))

    image = cv2.imread(image_file)
    cars = []

    if video_writer is None:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        video_writer = cv2.VideoWriter(video_out, fourcc, 25.0, (1242, 375))

    # Draw 3D Bounding Box
    for line in open(predi_file):
        line = line.strip().split(' ')

        dims = np.asarray([float(number) for number in line[8:11]])
        center = np.asarray([float(number) for number in line[11:14]])
        rot_y = float(line[3]) + np.arctan(center[0] /
                                           center[2])  # float(line[14])

        box_3d = []

        for i in [1, -1]:
            for j in [1, -1]:
                for k in [0, 1]:
                    point = np.copy(center)
                    point[0] = center[0] + i * dims[1]/2 * \
                        np.cos(-rot_y+np.pi/2) + (j*i) * \
                        dims[2]/2 * np.cos(-rot_y)
                    point[2] = center[2] + i * dims[1]/2 * \
                        np.sin(-rot_y+np.pi/2) + (j*i) * \
                        dims[2]/2 * np.sin(-rot_y)
                    point[1] = center[1] - k * dims[0]

                    point = np.append(point, 1)
                    point = np.dot(cam_to_img, point)
                    point = point[:2]/point[2]
                    point = point.astype(np.int16)
                    box_3d.append(point)

        for i in range(4):
            point_1_ = box_3d[2*i]
            point_2_ = box_3d[2*i+1]
            cv2.line(image, (point_1_[0], point_1_[1]),
                     (point_2_[0], point_2_[1]), (0, 255, 0), 2)

        for i in range(8):
            point_1_ = box_3d[i]
            point_2_ = box_3d[(i+2) % 8]
            cv2.line(image, (point_1_[0], point_1_[1]),
                     (point_2_[0], point_2_[1]), (0, 255, 0), 2)

    video_writer.write(np.uint8(image))

all_image = sorted(os.listdir(image_dir))
# np.random.shuffle(all_image)

for f in all_image:
    image_file = image_dir + f
    box2d_file = box2d_dir + f.replace('png', 'txt')
    box3d_file = box3d_dir + f.replace('png', 'txt')
    label_file = label_dir + f.replace('png', 'txt')
    calib_file = calib_dir + f.replace('png', 'txt')
    predi_file = predi_dir + f.replace('png', 'txt')

    open(label_file, 'w')

    with open(predi_file, 'w') as prediction:
        # Construct list of all candidate centers
        centers_2d = []
        centers_3d = []

        for line in open(calib_file):
            if 'P2:' in line:
                cam_to_img = line.strip().split(' ')
                cam_to_img = np.asarray([float(number)
                                        for number in cam_to_img[1:]])
                cam_to_img = np.reshape(cam_to_img, (3, 4))

        for line in open(label_file):
            line = line.strip().split(' ')

            center = np.asarray([float(number) for number in line[11:14]])
            center = np.append(center, 1)
            center = np.dot(cam_to_img, center)
            center = center[:2]/center[2]
            center = center.astype(np.int16)

            centers_2d.append(center)
            centers_3d.append(np.asarray([float(number)
                              for number in line[11:14]]))

        # Find the nearest centres among the candidates
        for line in open(box3d_file):
            line = line.strip().split(' ')

            obj = {'xmin': int(float(line[4])),
                   'ymin': int(float(line[5])),
                   'xmax': int(float(line[6])),
                   'ymax': int(float(line[7])), }

            center = np.asarray(
                [(obj['xmin']+obj['xmax'])/2., (obj['ymin'] + obj['ymax'])/2.])

            nearest_index = -1
            last_distance = 1000000000.

            for i in range(len(centers_2d)):
                candidate = centers_2d[i]
                distance = np.sum(np.square(center - candidate))

                if distance < 1000 and distance < last_distance:
                    nearest_index = i
                    last_distance = distance

            if nearest_index > -1:
                line += list(centers_3d[nearest_index])
                del centers_2d[nearest_index]
                del centers_3d[nearest_index]

                # Write regressed 3D dim and oritent to file
                line = ' '.join([str(item) for item in line]) + '\n'
                prediction.write(line)
kittiDir = 'E:/workspace/2dto3d/data/2011_09_26_drive_0014_sync/2011_09_26/'
drive = '2011_09_26_drive_0014_sync/'

label_dir = kittiDir + drive + 'label_00/'
image_dir = kittiDir + drive + 'image_00/data/'
calib_dir = kittiDir + drive + 'calib_00/'

# FIGURE OUT THE LABELS
os.system('rm ' + label_dir + '*')
os.system('rm ' + calib_dir + '*')

# Read transformation matrices
for line in open(kittiDir + drive + 'calib_velo_to_cam.txt'):
    if 'R:' in line:
        R = line.strip().split(' ')
        R = np.asarray([float(number) for number in R[1:]])
        R = np.reshape(R, (3, 3))

    if 'T:' in line:
        T = line.strip().split(' ')
        T = np.asarray([float(number) for number in T[1:]])
        T = np.reshape(T, (3, 1))

for line in open(kittiDir + drive + 'calib_cam_to_cam.txt'):
    if 'R_rect_00:' in line:
        R0_rect = line.strip().split(' ')
        R0_rect = np.asarray([float(number) for number in R0_rect[1:]])
        R0_rect = np.reshape(R0_rect, (3, 3))

R0_rect = np.append(R0_rect, np.zeros((3, 1)), axis=1)
R0_rect = np.append(R0_rect, np.zeros((1, 4)), axis=0)
R0_rect[-1, -1] = 1

Tr_velo_to_cam = np.concatenate([R, T], axis=1)
Tr_velo_to_cam = np.append(Tr_velo_to_cam, np.zeros((1, 4)), axis=0)
Tr_velo_to_cam[-1, -1] = 1

transform = np.dot(R0_rect, Tr_velo_to_cam)

# print Tr_velo_to_cam
# print R0_rect
# print transform

# Read the tracklets
for trackletObj in xmlParser.parseXML(kittiDir + drive + 'tracklet_labels.xml'):
    for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
        label_file = label_dir + str(absoluteFrameNumber).zfill(10) + '.txt'

        translation = np.append(translation, 1)
        translation = np.dot(transform, translation)
        translation = translation[:3]/translation[3]

        with open(label_file, 'a') as file_writer:
            line = [trackletObj.objectType] + [0, 0, rotation[2]] + [0, 0, 0,
                                                                     0] + list(trackletObj.size) + list(translation) + [rotation[2]]
            line = ' '.join([str(item) for item in line]) + '\n'
            file_writer.write(line)

# FIGURE OUT THE CALIBRATION
for line in open(kittiDir + drive + 'calib_cam_to_cam.txt'):
    if 'P_rect_02' in line:
        line_P2 = line.replace('P_rect_02', 'P2')
        print(line_P2)

for image in os.listdir(image_dir):
    label_file = label_dir + image.split('.')[0] + '.txt'
    calib_file = calib_dir + image.split('.')[0] + '.txt'

    # Create calib files
    with open(calib_file, 'w') as file_writer:
        file_writer.write(line_P2)

    # Fix missing lables
    with open(label_file, 'a') as file_writer:
        file_writer.write('')

all_image = sorted(os.listdir(image_dir))
# np.random.shuffle(all_image)

for f in all_image:
    image_file = image_dir + f
    calib_file = calib_dir + f.replace('png', 'txt')
    predi_file = predi_dir + f.replace('png', 'txt')

    # read calibration data
    for line in open(calib_file):
        if 'P2:' in line:
            cam_to_img = line.strip().split(' ')
            cam_to_img = np.asarray([float(number)
                                    for number in cam_to_img[1:]])
            cam_to_img = np.reshape(cam_to_img, (3, 4))

        # if 'R0_rect:' in line:
        #    R0_rect = line.strip().split(' ')
        #    R0_rect = np.asarray([float(number) for number in R0_rect[1:]])
        #    R0_rect = np.reshape(R0_rect, (3,3))

        # if 'Tr_velo_to_cam:' in line:
        #    Tr_velo_to_cam = line.strip().split(' ')
        #    Tr_velo_to_cam = np.asarray([float(number) for number in Tr_velo_to_cam[1:]])
        #    Tr_velo_to_cam = np.reshape(Tr_velo_to_cam, (3,4))

    # R0_rect = np.append(R0_rect, np.zeros((3,1)), axis=1)
    # R0_rect = np.append(R0_rect, np.zeros((1,4)), axis=0)
    # R0_rect[-1,-1] = 1

    # Tr_velo_to_cam = np.append(Tr_velo_to_cam, np.zeros((1,4)), axis=0)
    # Tr_velo_to_cam[-1,-1] = 1

# draw 2D boxes and 3D boxes
index = 11
image = cv2.imread(image_path + dataset[index] + '.png')
cars = []

for line in open(label_path + dataset[index] + '.txt').readlines():
    line = line.strip().split(' ')

    if 'Car' in line[0]:
        # Draw 2D Bounding Box
        x_min, y_min, x_max, y_max = [
            int(float(number)) for number in line[4:8]]
        # cv2.rectangle(image, (x_min,y_min), (x_max,y_max), (255,255,0), 3)

        # Draw 3D Bounding Box
        dims = np.asarray([float(number) for number in line[8:11]])
        center = np.asarray([float(number) for number in line[11:14]])

        if np.abs(float(line[3])) < 0.01:
            continue
        print(line[3], center)

        rot_y = float(line[3]) + np.arctan(center[0] /
                                           center[2])  # float(line[14])

        box_3d = []

        for i in [1, -1]:
            for j in [1, -1]:
                for k in [0, 1]:
                    point = np.copy(center)
                    point[0] = center[0] + i * dims[1]/2 * \
                        np.cos(-rot_y+np.pi/2) + (j*i) * \
                        dims[2]/2 * np.cos(-rot_y)
                    point[2] = center[2] + i * dims[1]/2 * \
                        np.sin(-rot_y+np.pi/2) + (j*i) * \
                        dims[2]/2 * np.sin(-rot_y)
                    point[1] = center[1] - k * dims[0]

                    point = np.append(point, 1)
                    point = np.dot(cam_to_img, point)
                    point = point[:2]/point[2]
                    point = point.astype(np.int16)
                    box_3d.append(point)

        for i in range(4):
            point_1_ = box_3d[2*i]
            point_2_ = box_3d[2*i+1]
            cv2.line(image, (point_1_[0], point_1_[1]),
                     (point_2_[0], point_2_[1]), (255, 0, 0), 3)

        for i in range(8):
            point_1_ = box_3d[i]
            point_2_ = box_3d[(i+2) % 8]
            cv2.line(image, (point_1_[0], point_1_[1]),
                     (point_2_[0], point_2_[1]), (255, 0, 0), 3)

fig = plt.figure(figsize=(20, 20))
plt.imshow(image)
plt.show()
