In [1]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Dense, MaxPool2D, BatchNormalization, UpSampling2D, ReLU
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import cv2
import os
from math import sin, cos


In [2]:

camera_matrix = np.array([[2304.5479, 0,  1686.2379],
                          [0, 2305.8757, 1354.9849],
                          [0, 0, 1]], dtype=np.float32)
camera_matrix_inv = np.linalg.inv(camera_matrix)
IMG_SHAPE = (2710, 3384, 3)

train=pd.read_csv('train.csv')
test=pd.read_csv('sample_submission.csv')

def imread(path, fast_mode=False):
    img = cv2.imread(path)
    if not fast_mode and img is not None and len(img.shape) == 3:
        img = np.array(img[:, :, ::-1])
    return img

In [3]:
cars = [
        'baojun-310-2017','biaozhi-3008','biaozhi-liangxiang','bieke-yinglang-XT',
        'biyadi-2x-F0','changanbenben','dongfeng-DS5','feiyate',
        'fengtian-liangxiang','fengtian-MPV','jilixiongmao-2015','lingmu-aotuo-2009',
        'lingmu-swift','lingmu-SX4-2012','sikeda-jingrui','fengtian-weichi-2006',
        '037-CAR02','aodi-a6','baoma-330','baoma-530',
        'baoshijie-paoche','bentian-fengfan','biaozhi-408','biaozhi-508',
        'bieke-kaiyue','fute','haima-3','kaidilake-CTS',
        'leikesasi','mazida-6-2015','MG-GT-2015','oubao',
        'qiya','rongwei-750','supai-2016','xiandai-suonata',
        'yiqi-benteng-b50','bieke','biyadi-F3','biyadi-qin',
        'dazhong','dazhongmaiteng','dihao-EV','dongfeng-xuetielong-C6',
        'dongnan-V3-lingyue-2011','dongfeng-yulong-naruijie','019-SUV','036-CAR01',
        'aodi-Q7-SUV','baojun-510','baoma-X5','baoshijie-kayan',
        'beiqi-huansu-H3','benchi-GLK-300','benchi-ML500','fengtian-puladuo-06',
        'fengtian-SUV-gai','guangqi-chuanqi-GS4-2015','jianghuai-ruifeng-S3','jili-boyue',
        'jipu-3','linken-SUV','lufeng-X8','qirui-ruihu',
        'rongwei-RX5','sanling-oulande','sikeda-SUV','Skoda_Fabia-2011',
        'xiandai-i25-2016','yingfeinidi-qx80','yingfeinidi-SUV','benchi-SUR',
        'biyadi-tang','changan-CS35-2012','changan-cs5','changcheng-H6-2016',
        'dazhong-SUV','dongfeng-fengguang-S560','dongfeng-fengxing-SX6'
        ]
carid2name={}
for i in range(len(cars)):
    carid2name[i]=cars[i]

In [4]:
def str2cords(s):
    names=['id', 'yaw', 'pitch', 'roll', 'x', 'y', 'z']
    cords=[]
    for i in np.array(s.split()).reshape([-1,7]):
        cords.append(dict(zip(names, i.astype(float))))
        if 'id' in cords[-1]:
            cords[-1]['id']=int(cords[-1]['id'])
            
    return cords

def rotate(x, angle):
    x = x + angle
    x = x - (x + np.pi) // (2 * np.pi) * 2 * np.pi
    return x

def get_img_coords(s):
    coords = str2coords(s)
    xs = [c['x'] for c in coords]
    ys = [c['y'] for c in coords]
    zs = [c['z'] for c in coords]
    P = np.array(list(zip(xs, ys, zs))).T
    img_p = np.dot(camera_matrix, P).T
    img_p[:, 0] /= img_p[:, 2]
    img_p[:, 1] /= img_p[:, 2]
    img_xs = img_p[:, 0]
    img_ys = img_p[:, 1]
    img_zs = img_p[:, 2]
    return img_xs, img_ys

def euler_to_Rot(yaw, pitch, roll):
    Y = np.array([[cos(yaw), 0, sin(yaw)],
                  [0, 1, 0],
                  [-sin(yaw), 0, cos(yaw)]])
    P = np.array([[1, 0, 0],
                  [0, cos(pitch), -sin(pitch)],
                  [0, sin(pitch), cos(pitch)]])
    R = np.array([[cos(roll), -sin(roll), 0],
                  [sin(roll), cos(roll), 0],
                  [0, 0, 1]])
    return np.dot(Y, np.dot(P, R))

In [5]:
PATH='car_models_json/'

def draw_obj(image, vertices, triangles, num_for_color):
    for t in triangles:
        coord = np.array([vertices[t[0]][:2], vertices[t[1]][:2], vertices[t[2]][:2]], dtype=np.int32)
#         cv2.fillConvexPoly(image, coord, (0,0,255))
        place = num_for_color%3
        color = [0,0,0]
        color[place] = 255
        color = tuple(color)
        cv2.polylines(image, np.int32([coord]), 1, color)
    return image

def draw_bw(image, vertices, triangles, num_for_color):
    for t in triangles:
        coord = np.array([vertices[t[0]][:2], vertices[t[1]][:2], vertices[t[2]][:2]], dtype=np.int32)
        color = [255,255,255]
        color = tuple(color)
        cv2.polylines(image, np.int32([coord]), 1, color)
    return image

def visualize(img, coords):
    img = img.copy()
    num_for_color = 0
    masks = np.zeros(8*img.shape[0]*img.shape[1]).reshape(8, img.shape[0], img.shape[1])
    for point in coords:
        c_model = carid2name[int(point['id'])] + '.json'
        with open(PATH+c_model) as json_file:
            data = json.load(json_file)
        vertices = np.array(data['vertices'])
        vertices[:, 1] = -vertices[:, 1]
        triangles = np.array(data['faces']) - 1
        x, y, z = point['x'], point['y'], point['z']
        yaw, pitch, roll = -point['pitch'], -point['yaw'], -point['roll']
        Rt = np.eye(4)
        t = np.array([x, y, z])
        Rt[:3, 3] = t
        Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
        Rt = Rt[:3, :]
        P = np.ones((vertices.shape[0],vertices.shape[1]+1))
        P[:, :-1] = vertices
        P = P.T
        img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]
        img_cor_points = img_cor_points.astype(int)
        # find counters
        overlay = np.zeros_like(img)
        overlay = draw_bw(overlay, img_cor_points, triangles, num_for_color)
        overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2GRAY)
        _, contours, hierarchy = cv2.findContours(overlay, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
        # draw counters
        for i in range(len(contours)):
            if hierarchy[0][i][3]==-1:
                overlay = cv2.drawContours(overlay, contours, i, 255, -1)
        # for instance segmentation
        overlay_contours = overlay
        overlay_contours = cv2.Canny(overlay_contours, 30, 200)
        kernel = np.ones((8,8),np.uint8)
        overlay_contours = cv2.dilate(overlay_contours,kernel,iterations = 1)
        # logits
        masks[0][overlay!=0] = 1
        # for IS
        masks[0][overlay_contours!=0] = 0
        # x
        masks[1][overlay!=0] = point['x']/100
        # y
        masks[2][overlay!=0] = point['y']/100
        # z
        masks[3][overlay!=0] = point['z']/100
        # yaw
        masks[4][overlay!=0] = point['yaw']
        # pitch sin
        psin = sin(point['pitch'])
        masks[5][overlay!=0] = psin
        # pitch cos
        pcos = cos(point['pitch'])
        masks[6][overlay!=0] = pcos
        # roll
        masks[7][overlay!=0] = rotate(point['roll'],np.pi)
        
        #plt.imshow(overlay)
        #plt.show()
        img = draw_obj(img, img_cor_points, triangles, num_for_color)
        num_for_color += 1
    return masks

In [6]:
#sample
img=cv2.imread('train_images/ID_0be7ae789.jpg')
cord=np.array(train.loc[train['ImageId']=='ID_0be7ae789', 'PredictionString'])
point=str2cords(cord[0])[3]
cmodel=carid2name[int(point['id'])]
with open('car_models_json/'+cmodel+'.json') as file:
    data=json.load(file)
vertices=np.array(data['vertices'])
vertices[:, 1]=-vertices[:, 1]
traingles=np.array(data['faces'])-1
x,y,z=point['x'], point['y'], point['z']
yaw, pitch, roll=-point['yaw'], -point['pitch'], -point['roll']
Rt = np.eye(4)
t = np.array([x, y, z])
Rt[:3, 3] = t
Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
Rt = Rt[:3, :]
P = np.ones((vertices.shape[0],vertices.shape[1]+1))
P[:, :-1] = vertices
P = P.T
img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
img_cor_points = img_cor_points.T
img_cor_points[:, 0] /= img_cor_points[:, 2]
img_cor_points[:, 1] /= img_cor_points[:, 2]
img_cor_points = img_cor_points.astype(int)

In [7]:
overlay=np.zeros_like(img)
for t in traingles:
    cor=np.array([img_cor_points[t[0]][:2], img_cor_points[t[1]][:2], img_cor_points[t[2]][:2]])
    color=tuple([255,255,255])
    cv2.polylines(overlay, np.int32([cor]), 1, color)
    
overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2GRAY)
_, contours, hierarchy = cv2.findContours(overlay, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
        # draw counters
for i in range(len(contours)):
    if hierarchy[0][i][3]==-1:
        overlay = cv2.drawContours(overlay, contours, i, 255, -1)
        
overlay_contours = cv2.Canny(overlay, 30, 200)
kernel = np.ones((8,8),np.uint8)
overlay_contours = cv2.dilate(overlay_contours,kernel,iterations = 1)


In [19]:
uniq, c=np.unique(mask[0], return_counts=True)
print(dict(zip(uniq, c)))

{0.0: 8698336, 0.0409935: 472304}


In [18]:
mask=np.zeros(8*img.shape[0]*img.shape[1]).reshape(8, img.shape[0], img.shape[1])
mask[0][overlay!=0]=point['x']/100

In [6]:
class conv_block(tf.keras.Model):
    def __init__(self, ch):
        super(conv_block, self).__init__()
        self.conv=Sequential([Conv2D(ch, 3,strides=(np.int64(1),np.int64(1)), padding='same', data_format='channels_first'),
                                BatchNormalization(),
                              ReLU(),
                         Conv2D(ch, 3, strides=(np.int64(1),np.int64(1)), padding='same', data_format='channels_first'),
                              BatchNormalization(),
                              ReLU()])
    
    def call(self, x):
        x=self.conv(x)
        return x

class network(tf.keras.Model):
    def __init__(self, n_classes):
        super(network, self).__init__()
        self.c0=conv_block(32)
        self.m0=MaxPool2D(2)
        self.c1=conv_block(64)
        self.m1=MaxPool2D(2)
        self.c2=conv_block(128)
        self.m2=MaxPool2D(2)
        self.c3=conv_block(256)
        self.m3=MaxPool2D(2)
        self.c4=conv_block(256)
        self.up0=UpSampling2D(size=(2,2), interpolation='bilinear')
        self.c5=conv_block(128)
        self.up1=UpSampling2D(size=(2,2), interpolation='bilinear')
        self.c6=conv_block(64)
        self.up2=UpSampling2D(size=(2,2), interpolation='bilinear')
        self.c7=conv_block(32)
        self.up3=UpSampling2D(size=(2,2), interpolation='bilinear')
        self.last=conv_block(n_classes)

    def call(self, x):
        x0=self.c0(x)
        #print(x0)
        x1=self.c1(self.m0(x0))
        #print(x1)
        x2=self.c2(self.m1(x1))
        #print(x2)
        x3=self.c3(self.m2(x2))
        #print(x3)
        x=self.c4(self.m3(x3))
        #print(x)
        x=tf.concat([x3, self.up0(x)], 1)
        #print(x)
        x=self.up1(self.c5(x))
        #print(x)
        x=tf.concat([x2, x], 1)
        #print(x)
        x=self.up2(self.c6(x))
        #print(x)
        x=tf.concat([x1, x], 1)
        #print(x)
        x=self.up3(self.c7(x))
        #print(x)
        x=tf.concat([x0, x], 1)
        #print(x)
        x=self.last(x)
        #print(x)

        return x

In [7]:
from sklearn.model_selection import train_test_split
train, val=train_test_split(train, test_size=0.9, random_state=42)
test=test

scale=2
BATCH_SIZE=5
EPOCH=10

def sample_fetcher(dframe):
    
    for val in dframe.values:
        Id=val[0]
        ps=val[1]
        img=imread('train_images/'+Id+'.jpg')
        regr=visualize(img, str2cords(ps))
        img=img[1430:, :, :]/255
        img=cv2.resize(img, (640//scale, 480//scale))
        img=np.rollaxis(img, 2, 0)
        regr=regr[:, 1430:, :]
        regr=np.rollaxis(np.rollaxis(regr, 2,0), 2, 0)
        regr=cv2.resize(regr, (640//scale, 480//scale))
        regr=np.rollaxis(regr, 2, 0)
        mask=imread('train_masks/'+Id+'.jpg')
        if type(mask)==np.ndarray:
            mask=mask[1430:, :, :]
            mask=cv2.resize(mask, (640//scale, 480//scale))
            mask=np.rollaxis(mask, 2, 0)

        if type(mask)!=np.ndarray:
            mask=np.zeros(2710*3384*3).reshape(2710,3384,3)
            mask=mask[1430: :, :]
            mask=cv2.resize(mask, (640//scale, 480//scale))
            mask=np.rollaxis(mask, 2, 0)

            
        img=tf.convert_to_tensor(img, dtype=tf.float32)
        regr=tf.convert_to_tensor(regr, dtype=tf.float32)
        mask=tf.convert_to_tensor(mask, dtype=tf.float32)
        
        yield (img, regr, mask)
        
def train_sampler():
    return sample_fetcher(train)
        
def valid_sampler():
    return sample_fetcher(val)


train_dataset=tf.data.Dataset.from_generator(generator=train_sampler, 
                                             output_types=(tf.float32, tf.float32, tf.float32))
train_dataset=train_dataset.repeat()
train_dataset=train_dataset.batch(BATCH_SIZE)
train_dataset=train_dataset.shuffle(10)

valid_dataset=tf.data.Dataset.from_generator(generator=valid_sampler, output_types=(tf.float32, tf.float32, tf.float32))


In [8]:
def custom_loss(pred, regr, mask):
    
    pred_mask=tf.sigmoid(pred[:, 0])
    pred_regr=pred[:, 1:]
    with tf.compat.v1.variable_scope('loss'):
        mask_loss=mask*tf.math.log(pred_mask+1e-12)+(1-mask)*tf.math.log(1-pred_mask+1e-12)
        mask_loss=-tf.reduce_sum(tf.reduce_mean(mask_loss, 0))
    
        regr_loss=tf.reduce_sum( tf.reduce_sum( (tf.reduce_sum(tf.math.abs(pred_regr-regr), 1)*mask), 1))/tf.reduce_sum((tf.reduce_sum( mask, 1)), 1) 
        regr_loss=tf.reduce_mean(regr_loss, 0)
    
        total_loss=mask_loss+regr_loss
        
    #print("mask loss : ", mask_loss)
    #print("regr loss : ", regr_loss)
    #if not size_avg:
     #   total_loss*=pred[0]
                            
    return total_loss

In [12]:

(img, regr, mask)=next(train_sampler())
'''
img=img[np.newaxis, :, :, :]
reg=regr[np.newaxis, :, :, :]
mask=mask[np.newaxis, :, :, :]

out=model(img_)
pred_mask=out[:, 0]
pred_regr=out[:, 1:]

pred_regr=out[:, 1:]

print(pred_mask.shape)
print(mask.shape)


a=reg[:, 0]*tf.math.log(pred_mask + 1e-12)
print(a)
a_=tf.reduce_mean(a, 0)
print(a_)
a__=tf.reduce_sum(a_)
print(a__)
'''

'\nimg=img[np.newaxis, :, :, :]\nreg=regr[np.newaxis, :, :, :]\nmask=mask[np.newaxis, :, :, :]\n\nout=model(img_)\npred_mask=out[:, 0]\npred_regr=out[:, 1:]\n\npred_regr=out[:, 1:]\n\nprint(pred_mask.shape)\nprint(mask.shape)\n\n\na=reg[:, 0]*tf.math.log(pred_mask + 1e-12)\nprint(a)\na_=tf.reduce_mean(a, 0)\nprint(a_)\na__=tf.reduce_sum(a_)\nprint(a__)\n'

In [21]:
regr.shape

TensorShape([8, 240, 320])

In [22]:
uniq, count=np.unique(regr[1], return_counts=True)

In [23]:
an=dict(zip(uniq, count))

In [24]:
an

{-0.145415: 281,
 -0.14516151: 1,
 -0.14465195: 1,
 -0.14240281: 1,
 -0.1422022: 1,
 -0.1402652: 1,
 -0.1395915: 1,
 -0.135776: 6,
 -0.1355302: 1,
 -0.13299559: 1,
 -0.1293498: 1,
 -0.12930965: 1,
 -0.12770323: 1,
 -0.1269804: 1,
 -0.126137: 1032,
 -0.125138: 1,
 -0.12467434: 1,
 -0.123787895: 1,
 -0.12325605: 1,
 -0.121180646: 2,
 -0.119666256: 1,
 -0.11769838: 1,
 -0.11510009: 1,
 -0.111619584: 2,
 -0.11035296: 1,
 -0.10580829: 1,
 -0.10545234: 1,
 -0.104861: 902,
 -0.1040119: 1,
 -0.10355242: 2,
 -0.102363: 116,
 -0.102325834: 1,
 -0.10214388: 2,
 -0.10137729: 1,
 -0.10105145: 1,
 -0.100313544: 1,
 -0.0995245: 37,
 -0.099158846: 1,
 -0.09848247: 1,
 -0.09845787: 1,
 -0.09823711: 1,
 -0.09719844: 1,
 -0.0970105: 666,
 -0.096912906: 1,
 -0.096893385: 5,
 -0.09672553: 1,
 -0.0963859: 540,
 -0.095182285: 1,
 -0.094640814: 1,
 -0.0944932: 99,
 -0.09416366: 1,
 -0.0929411: 1,
 -0.092169076: 1,
 -0.09085717: 1,
 -0.090088494: 1,
 -0.08961298: 1,
 -0.0885603: 406,
 -0.08807529: 1,
 -0.08782

In [10]:
model=network(8)
optm=tf.keras.optimizers.Adam(learning_rate=0.001)
''' 
model.compile(optimizers=optm, loss=custom_loss, metrics=[custom_loss])
history=model.fit(train_dataset, steps_per_epoch=np.ceil(len(train)/BATCH_SIZE), 
                 epochs=EPOCH, validation_data=valid_dataset, 
                  validation_steps=int(np.ceil(len(val)/BATCH_SIZE)), use_multiprocessing=False)
'''

' \nmodel.compile(optimizers=optm, loss=custom_loss, metrics=[custom_loss])\nhistory=model.fit(train_dataset, steps_per_epoch=np.ceil(len(train)/BATCH_SIZE), \n                 epochs=EPOCH, validation_data=valid_dataset, \n                  validation_steps=int(np.ceil(len(val)/BATCH_SIZE)), use_multiprocessing=False)\n'

In [11]:
for _ in range(EPOCH):
    
    for img, regr, mask in train_dataset:
        #img=img[np.newaxis, :, :, :]
        #regr=regr[np.newaxis, :, :, :]
        #mask=mask[np.newaxis, :, :, :]

        #with tf.compat.v1.variable_scope('loss', reuse=True):
        with tf.GradientTape() as tape:
            out=model(img)
            loss=custom_loss(out, regr[:, 1:], regr[:, 0])

            grads=tape.gradient(loss, model.trainable_variables)
            
            optm.apply_gradients(zip(grads, model.trainable_variables))
        print("Epoch {:03d}: Loss: {:.3f}".format(_, loss))

Epoch 000: Loss: 53273.207
Epoch 000: Loss: 53244.383
Epoch 000: Loss: 53245.020
Epoch 000: Loss: 53244.719
Epoch 000: Loss: 53243.984
Epoch 000: Loss: 53246.266
Epoch 000: Loss: 53246.547
Epoch 000: Loss: 53246.738
Epoch 000: Loss: 53245.957
Epoch 000: Loss: 53249.766
Epoch 000: Loss: 53244.785
Epoch 000: Loss: 53352.910
Epoch 000: Loss: 53327.578
Epoch 000: Loss: 53263.000
Epoch 000: Loss: 53245.984
Epoch 000: Loss: 53266.414
Epoch 000: Loss: 53275.219
Epoch 000: Loss: 53280.758
Epoch 000: Loss: 53247.316


KeyboardInterrupt: 