In [3]:
import tensorflow as tf

from data_handling.model_info import load_model_info
from data_handling import DataLoader, Conversion_Layers
from network import loss, cnn_definition_paper

from star_representation import StarRepresentation
from dash_repesentation import RemoveCameraEffect, DashRepresentation
from reverse_op import PODestarisation

from keras.optimizers import Adam

bop_path  = '/tf/notebooks/datasets'
dataset = 'tless'

dataset_path = f'{bop_path}/{dataset}'

print(f'Dataset path:   {dataset_path}')
print(f'GPUs Available: {tf.config.list_physical_devices("GPU")}')

train = ['train_pbr', 'train_primesense']
test = ['test_primesense']

xyDim = 112
strides = 2

Dataset path:   /tf/notebooks/datasets/tless
GPUs Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
def pred_loss(true, pred):
    return pred

def totuplething(x1, x2, x3, x4, x5, x6, x7):
    return ((x1, x2, x3, x4, x7, x5, x6), x6)

for oiu in range(1,31):
    print('Object ', oiu)
    model_info = load_model_info(dataset_path, oiu, verbose=1)
    train_data = DataLoader.load_gt_data([f'{dataset_path}/{d}' for d in train ], oiu)
    print(f'Found train data for {len(train_data)} occurencies of object {oiu}, where {len([d for d in train_data if "primesense" in d["root"]])} origined from primesense.')

    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():    
        inputs, valid_po, isvalid, depth, segmentation = Conversion_Layers.create_Dataset_conversion_layers(xyDim, xyDim, model_info, strides)

        valid_dash = DashRepresentation(model_info["symmetries_discrete"][0][:3,-1] / 2. if len(model_info["symmetries_discrete"]) > 0 else 0 )(inputs['roationmatrix'], valid_po)
        valid_po_star = StarRepresentation(model_info)(valid_po)

        cnn_po_star, cnn_po_dash, cnn_w_px, cnn_w_d, cnn_seg = cnn_definition_paper.rgb255_to_obj_net(inputs['rgb'])
        dash_image = RemoveCameraEffect(strides)(cnn_po_dash, inputs['camera_matrix'], inputs['coord_offset'])
        
        csl_trainable_layers = tf.keras.Model([inputs['rgb'], inputs['depth'], inputs['camera_matrix'], inputs['coord_offset']],
                                              [cnn_po_star, dash_image, cnn_w_px, cnn_w_d, cnn_seg])
                
        po_image = PODestarisation(model_info,amount_of_instances = 1)(cnn_po_star, dash_image, isvalid, inputs['roationmatrix'])
        po_uv, po_cam = loss.Po_to_Img()(po_image, inputs['camera_matrix'], inputs['roationmatrix'], inputs['translation'])

    #     diff_po = Lambda(squared_diff_of_pos, name='po_diff')((po_image, valid_po, isvalid))
        diff_postar = loss.AvgSqrDiff_of_validPixels(name='pos_diff')(cnn_po_star, valid_po_star, isvalid)
        diff_vo = loss.AvgSqrDiff_of_validPixels(name='vo_diff')(dash_image, valid_dash, isvalid)
        (seg_loss, seg_met, seg_fgmet) = loss.Seg_Loss(name='seg')(cnn_seg, segmentation)  

        sig2inv = loss.ToOmega()(cnn_w_px, isvalid)
        po_uv_diff = loss.UV_diff(strides)(po_uv, inputs['coord_offset'])
        lw2_loss, chi2error = loss.Avg_nllh(name='w2')(sig2inv, po_uv_diff, isvalid)

        sig1inv =  loss.ToOmega()(cnn_w_d, isvalid)
        po_depth_diff = loss.D_diff()(po_cam, depth)
        lw1_loss, chi2error_d = loss.Avg_nllh(name='w1')(sig1inv, po_depth_diff, isvalid)
#         lw1_loss, chi2error_d = Lambda(wp_loss_wd, name='w1')((cnn_w_d, po_cam, depth, isvalid))

        train_povoseg_model = tf.keras.Model(inputs.values(), (diff_postar, diff_vo, seg_loss, seg_met, seg_fgmet))
        train_model = tf.keras.Model(inputs.values(), (diff_postar, diff_vo, seg_loss, seg_met, seg_fgmet,
                                                      lw2_loss, chi2error, lw1_loss, chi2error_d))

        train_povoseg_model.compile(Adam(0.0001,  amsgrad=True),
                            loss = pred_loss,
                            loss_weights=(1,1,1,0,0)
                          )

        train_model.compile(Adam(0.0001,  amsgrad=True),
                            loss = pred_loss,
                            loss_weights=(1,1,1,0,0,
                                          1,0,1,0)
                           ) 
        
#         train_povoseg_model.fit(Dataset(train_data,xyDim, times=2, group_size=5, random=True).batch(80).prefetch(20).map(totuplething),
#                         epochs=2,
#                         verbose=1,
#                         workers=8,
#                         max_queue_size=100,
#                         use_multiprocessing=True)
#         train_povoseg_model.save_weights(ff'{dataset_path}/saved_weights/new_{oiu}_train_povoseg_2e')

        train_model.fit(DataLoader.Dataset(train_data,xyDim, times=2, group_size=5, random=True).batch(40).prefetch(20).map(totuplething),
                                epochs=10,
                                verbose=1,
                                workers=8,
                                max_queue_size=100,
                                use_multiprocessing=True)
        train_model.save_weights(f'{dataset_path}/saved_weights/csl_o{oiu}_train_model_10e')
        csl_trainable_layers.save(f'{dataset_path}/saved_models/csl_o{oiu}_trainable_layers')

Object  1
model_info for object 1:
diameter : 63.5151
mins : [-17.4958 -17.4958 -30.6   ]
maxs : [17.4958 17.4958 30.6   ]
symmetries_discrete : []
symmetries_continuous : True


100%|██████████| 50/50 [00:10<00:00,  4.78it/s]
100%|██████████| 30/30 [00:00<00:00, 65.88it/s]

Found train data for 21169 occurencies of object 1, where 1296 origined from primesense.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)





Starring as symmetries_continuous
destarring as symmetries_continuous
Epoch 1/10
destarring as symmetries_continuous
Starring as symmetries_continuous
destarring as symmetries_continuous
Starring as symmetries_continuous


2024-05-06 13:28:11.764110: I tensorflow/core/util/cuda_solvers.cc:179] Creating GpuSolver handles for stream 0x89768c0
2024-05-06 13:28:12.315469: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8906
2024-05-06 13:28:16.287910: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f7e6977bdb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-05-06 13:28:16.287925: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4070, Compute Capability 8.9
2024-05-06 13:28:16.290805: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1715002096.327692     208 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10


2024-05-06 13:55:50.310275: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 16560608353834730895
2024-05-06 13:55:50.310300: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 235364148928956383
2024-05-06 13:55:50.310303: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8715105312786090647
2024-05-06 13:55:50.310305: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8457346204189642333
2024-05-06 13:55:50.310308: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 9761640983032909691
2024-05-06 13:55:50.310311: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8617473630866248401
2024-05-06 13:55:50.310313: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv ite

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: /tf/notebooks/datasets/tless/saved_models/csl_o1_trainable_layers/assets


INFO:tensorflow:Assets written to: /tf/notebooks/datasets/tless/saved_models/csl_o1_trainable_layers/assets


Object  2
model_info for object 2:
diameter : 66.1512
mins : [-21.6448 -21.6448 -30.8511]
maxs : [21.6448 21.6448 30.8511]
symmetries_discrete : []
symmetries_continuous : True


100%|██████████| 50/50 [00:12<00:00,  4.15it/s]
100%|██████████| 30/30 [00:00<00:00, 44.17it/s]

Found train data for 23086 occurencies of object 2, where 1296 origined from primesense.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)



INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Starring as symmetries_continuous
destarring as symmetries_continuous
Epoch 1/10
destarring as symmetries_continuous
Starring as symmetries_continuous
destarring as symmetries_continuous
Starring as symmetries_continuous
Epoch 2/10


2024-05-06 18:18:01.066239: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 16560608353834730895
2024-05-06 18:18:01.066258: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 235364148928956383
2024-05-06 18:18:01.066261: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8715105312786090647
2024-05-06 18:18:01.066263: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8457346204189642333
2024-05-06 18:18:01.066266: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 9761640983032909691
2024-05-06 18:18:01.066268: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8617473630866248401
2024-05-06 18:18:01.066271: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv ite

Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 