# Perform inference

This code allows to perform inference upon SDN and LRN training. The Notebook can run on both hosted and local runtimes.

# Preliminaries

Install required packages.

In [None]:
!pip install git+https://github.com/Microsatellites-and-Space-Microsystems/pose_estimation_domain_gap --quiet

Provide access to Google Drive.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Set the paths to NNs weights.

In [None]:
sdn_weight_path = '/content/gdrive/MyDrive/.../my_first_SDN.h5'
lrn_weight_path = '/content/gdrive/MyDrive/.../my_first_LRN.h5'

#Output file will be in json format
inference = 'sunlamp' #sunlamp / lightbox
images_folder = '/content/gdrive/MyDrive/SPEC21_test_images/'+inference
json_dest_file = '/content/gdrive/MyDrive/my_first_inference_'+inference+'.json'

# Initialize SDN

Use these cells to initialize a Swin based SDN.

In [None]:
from models_and_layers.tfswin import SwinTransformerTiny224 as transformerEncoder
import tensorflow as tf

def get_encoder(input_shape):

    input = tf.keras.layers.Input(shape=(input_shape, input_shape, 3))
    model=transformerEncoder(include_top=False)(input)
    x = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(model)
    model = tf.keras.models.Model(inputs=input, outputs=x)
    
    return model

In [None]:
class Discriminator(tf.keras.Model):
  def __init__(self,hidden_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential(
          [tf.keras.layers.Dense(self.hidden_dim*4,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=422)),
          tf.keras.layers.Dense(self.hidden_dim,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=342)),
          tf.keras.layers.Dense(1,name='cls',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=324)),
          ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x
  
class Regressor(tf.keras.Model):
  def __init__(self,hidden_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential(
        [tf.keras.layers.Dense(hidden_dim*4,activation='gelu',name='bbox1',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=3256)),
         tf.keras.layers.Dense(hidden_dim,activation='gelu',name='bbox2',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=768)),
         tf.keras.layers.Dense(4,activation='linear',name='bbox',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=4236)),
         ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x

In [None]:
# Build the model and restore weights.

input_shape = 224
hidden_dim=768

new_encoder=get_encoder(input_shape)
discriminator = Discriminator(hidden_dim)(new_encoder.output)
regressor=Regressor(hidden_dim)(new_encoder.output)
sdn_network=tf.keras.models.Model([new_encoder.input], [discriminator,regressor])

sdn_network.load_weights(sdn_weight_path)

Use these cells to initialize an EfficientNet based SDN.

In [None]:
from models_and_layers.efficientnet import EfficientNetV1B5
import tensorflow as tf

class get_encoder(tf.keras.Model):
  def __init__(self,hidden_dim,input_shape):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential([
    EfficientNetV1B5(num_classes=0,input_shape=(input_shape,input_shape,3),pretrained="imagenet"),
    tf.keras.layers.Conv2D(self.hidden_dim,1,kernel_initializer=tf.keras.initializers.GlorotUniform(seed=927)),
    tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')])
         
  def call(self, x):
    x = self.basic_layers(x)
    return x

In [None]:
class Discriminator(tf.keras.Model):
  def __init__(self,hidden_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential(
          [tf.keras.layers.Dense(self.hidden_dim*4,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=422)),
          tf.keras.layers.Dense(self.hidden_dim,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=342)),
          tf.keras.layers.Dense(1,name='cls',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=324)),
          ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x
  
class Regressor(tf.keras.Model):
  def __init__(self,hidden_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential(
        [tf.keras.layers.Dense(hidden_dim*4,activation='gelu',name='bbox1',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=3256)),
         tf.keras.layers.Dense(hidden_dim,activation='gelu',name='bbox2',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=768)),
         tf.keras.layers.Dense(4,activation='linear',name='bbox',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=4236)),
         ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x

In [None]:
# Build the model and restore weights.

input_shape = 224  #Assumed squared
hidden_dim=768     #Output size of the encoder
input=tf.keras.layers.Input(shape=(input_shape, input_shape, 3))

encoder=get_encoder(hidden_dim,input_shape)
discriminator = Discriminator(hidden_dim)(encoder(input))
regressor=Regressor(hidden_dim)(encoder(input))
sdn_network=tf.keras.models.Model([encoder.input], [discriminator,regressor])

sdn_network.load_weights(sdn_weight_path)

# Initialize LRN

Use these cells to initialize a Swin based LRN.

In [None]:
from models_and_layers.tfswin import SwinTransformerTiny224 as transformerEncoder

def get_encoder(input_shape):
    
    input = tf.keras.layers.Input(shape=(input_shape, input_shape, 3))
    model=transformerEncoder(include_top=False)(input)
    x = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(model)
    model = tf.keras.models.Model(inputs=input, outputs=x)
    
    return model

In [None]:
class Discriminator(tf.keras.Model):
  def __init__(self,hidden_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential(
          [tf.keras.layers.Dense(self.hidden_dim*4,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=1509)),
          tf.keras.layers.Dense(self.hidden_dim,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=9)),
          tf.keras.layers.Dense(1,name='cls',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=3412)),
          ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x
  
class Regressor(tf.keras.Model):
  def __init__(self,hidden_dim,num_keypoints):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_keypoints = num_keypoints
        self.basic_layers = tf.keras.Sequential(
        [tf.keras.layers.Dense(hidden_dim*4,activation='gelu',name='kpts1',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=121)),
         tf.keras.layers.Dense(hidden_dim,activation='gelu',name='kpts2',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=432)),
         tf.keras.layers.Dense(num_keypoints*2,activation='linear',name='kpts',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=3454)),
         ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x

In [None]:
# Build the model:

input_shape = 224  #Assumed squared
hidden_dim=768    #Output size of the encoder
num_keypoints = 11 #Satellite's keypoints

encoder=get_encoder(input_shape)
discriminator = Discriminator(hidden_dim)(encoder.output)
regressor=Regressor(hidden_dim,num_keypoints)(encoder.output)
lrn_network=tf.keras.models.Model([encoder.input], [discriminator,regressor])

lrn_network.load_weights(lrn_weight_path)

Use these cells to initialize an EfficientNet based LRN.

In [None]:
from models_and_layers.efficientnet import EfficientNetV1B5

class get_encoder(tf.keras.Model):
  def __init__(self,hidden_dim,input_shape):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_keypoints = num_keypoints
        self.basic_layers = tf.keras.Sequential([
    EfficientNetV1B5(num_classes=0,input_shape=(input_shape,input_shape,3),pretrained="imagenet"),
    tf.keras.layers.Conv2D(self.hidden_dim,1,kernel_initializer=tf.keras.initializers.GlorotUniform(seed=231)),
    tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')])
         
  def call(self, x):
    x = self.basic_layers(x)
    return x

In [None]:
class Discriminator(tf.keras.Model):
  def __init__(self,hidden_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.basic_layers = tf.keras.Sequential(
          [tf.keras.layers.Dense(self.hidden_dim*4,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=1509)),
          tf.keras.layers.Dense(self.hidden_dim,activation='gelu',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=9)),
          tf.keras.layers.Dense(1,name='cls',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=3412)),
          ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x
  
class Regressor(tf.keras.Model):
  def __init__(self,hidden_dim,num_keypoints):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.num_keypoints = num_keypoints
        self.basic_layers = tf.keras.Sequential(
        [tf.keras.layers.Dense(hidden_dim*4,activation='gelu',name='kpts1',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=121)),
         tf.keras.layers.Dense(hidden_dim,activation='gelu',name='kpts2',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=432)),
         tf.keras.layers.Dense(num_keypoints*2,activation='linear',name='kpts',kernel_initializer=tf.keras.initializers.GlorotUniform(seed=3454)),
         ]
        )
  def call(self, x):
    x = self.basic_layers(x)
    return x

In [None]:
# Build the model:

input_shape = 224  #Assumed squared
hidden_dim=768     #Output size of the encoder
num_keypoints = 11 #Satellite's keypoints
input=tf.keras.layers.Input(shape=(input_shape, input_shape, 3))

encoder=get_encoder(hidden_dim,input_shape)
discriminator = Discriminator(hidden_dim)(encoder(input))
regressor=Regressor(hidden_dim,num_keypoints)(encoder(input))
lrn_network=tf.keras.models.Model([encoder.input], [discriminator,regressor])

lrn_network.load_weights(lrn_weight_path)

# Tango model and camera matrix

In [None]:
#SPEED 3D Model

import numpy as np

# Camera matrix updated to SPEED+
cameraMatrix=np.array([[2988.579516381556, 0, 960],[0,2988.340115917612, 600],[0,0,1]])

#(k1,k2,p1,p2[,k3])
distCoeffs = np.array([-0.223830166065107, 0.514097970891064, -6.649961199834066e-04, -2.140477166748459e-04, -0.131242274290774])
#Points coordinates on Tango's frame:

#Create a np array "objectPoints" with size num_keypoints x 3 (x, y, z coordinates) containing the satellite 3D model (keypoints coordinates)

#objectPoints=...
objectPoints=objectPoints.reshape(11,3)


# Load images

In [None]:
import os

img=[]
for path in os.listdir(images_folder):
    full_path = os.path.join(images_folder, path)
    if os.path.isfile(full_path) and os.path.splitext(full_path)[1]=='.jpg':
        img.append(full_path)

print(len(img))

# Run inference

In [None]:
import time
from PIL import Image
import cv2
from scipy.spatial.transform import Rotation as Rot
import numpy as np
#import matplotlib.pyplot as plt

start_all=time.time()
image_width = 1920
image_height = 1200

np.set_printoptions(threshold=60000)

lrn_image_resolution = 224
sdn_image_resolution = 224

#Initialize variables to export data
images_names=[]
image_overall_time=np.zeros((len(img),1))
export_keypoints=np.zeros((len(img),num_keypoints*2))
sdn_inference_time=np.zeros((len(img),1))
export_bbox_coords=np.zeros((len(img),4))
lrn_inference_time=np.zeros((len(img),1))
load_image_time=np.zeros((len(img),1))

export_PnP_success=np.zeros((len(img),1))
export_position=np.zeros((len(img),3))
export_quat=np.zeros((len(img),4))

export_inliers_nr=np.zeros((len(img),1))

export_position_LM=np.zeros((len(img),3))
export_quat_LM=np.zeros((len(img),4))
PnP_time=np.zeros((len(img),1))
LM_time=np.zeros((len(img),1))

#export_predicted_class_sdn=np.zeros((len(img),1))
#export_predicted_class_lrn=np.zeros((len(img),1))
i=-1
for image_path in img:
    i+=1
    print(i)
    
    images_names.append(os.path.basename(image_path))
    image_time_start = time.time()

    image=Image.open(image_path)
    image=np.asarray(image) 
    image=np.expand_dims(image,-1)

    load_image_time[i,:]=time.time()-image_time_start

    image_sdn=tf.image.resize(image,
                          [sdn_image_resolution,sdn_image_resolution],
                          method=tf.image.ResizeMethod.BILINEAR,
                          antialias=False
    )
    
    image_sdn = (image_sdn / 127.5) - 1.0

    image_sdn = tf.image.grayscale_to_rgb(image_sdn)
    image_sdn=np.expand_dims(image_sdn,0)

    start=time.time()
    output=sdn_network(image_sdn)
        
    bbox_coords=output[1]

    #if output[0].numpy()<0:
    #  dataset_class = 0
    #else:
    #  dataset_class = 1

    sdn_inference_time[i,:]=time.time()-start

    [xmin,ymin,xmax,ymax]=np.reshape(bbox_coords.numpy(),[4])*[1920, 1200, 1920, 1200]
    export_bbox_coords[i,:]=[ymin,xmin,ymax,xmax]
    #export_predicted_class_sdn[i,:]=dataset_class

    #Define the ROI

    #Find the center coordinates and dimensions
    xc=(xmax+xmin)/2
    yc=(ymax+ymin)/2

    bbox_w=max((xmax-xmin)*1.15,lrn_image_resolution)
    bbox_h=max((ymax-ymin)*1.15, lrn_image_resolution)

    xmin=xc-bbox_w/2
    ymin=yc-bbox_h/2
    
    
    xmin=max(xmin,0)
    ymin=max(ymin,0)

    xmin=np.floor(xmin)
    ymin=np.floor(ymin)
    bbox_h=np.floor(bbox_h)
    bbox_w=np.floor(bbox_w)
    
    cropped_img = image[int(ymin):int(min(1200,ymin+bbox_h)),int(xmin):int(min(1920,xmin+bbox_w)),:]
    cropped_img_shape = np.shape(cropped_img);

    rows=cropped_img_shape[0]
    cols=cropped_img_shape[1]

    if rows<cols:
        rows_to_pad_up=np.floor((cols-rows)/2)
        padding_up=np.zeros([int(rows_to_pad_up),cols,1])

        rows_to_pad_down=cols-rows-rows_to_pad_up
        padding_down=np.zeros([int(rows_to_pad_down),cols,1])

        cropped_img=np.vstack((padding_up,cropped_img,padding_down))
        ymin = ymin-rows_to_pad_up

    elif cols<rows:
        cols_to_pad_left=np.floor((rows-cols)/2)
        padding_left=np.zeros([rows,int(cols_to_pad_left),1])

        cols_to_pad_right=rows-cols-cols_to_pad_left
        padding_right=np.zeros([rows,int(cols_to_pad_right),1])

        cropped_img=np.hstack((padding_left,cropped_img,padding_right))
        xmin = xmin-cols_to_pad_left
        

    image_lrn=tf.image.resize(cropped_img,
                          [lrn_image_resolution,lrn_image_resolution],
                          method=tf.image.ResizeMethod.BILINEAR,
                          antialias=False
    )
    
    image_lrn = (image_lrn / 127.5) - 1.0


    image_lrn = tf.image.grayscale_to_rgb(image_lrn)
    image_lrn=np.expand_dims(image_lrn,0)

    start=time.time()
    output_lrn=lrn_network(image_lrn)
    lrn_inference_time[i,:]=time.time()-start

    keypoints=output_lrn[1].numpy()

    #if output_lrn[0].numpy()<0:
    #  lrn_dataset_class = 0
    #else:
    #  lrn_dataset_class = 1

    #export_predicted_class_lrn[i,:]=lrn_dataset_class

    keypoints=np.reshape(keypoints,[num_keypoints*2,1])
    bbox_side = cropped_img.shape[0]

    keypoints=(keypoints)*bbox_side

    #Add xmin and ymin to x and y coordinates to recover values in original image frame

    keypoints=keypoints.reshape(num_keypoints,2)

    keypoints[:,0]+=xmin
    keypoints[:,1]+=ymin

    export_keypoints[i,:]=keypoints.reshape(1,num_keypoints*2)
    
    start=time.time()
    success, R_vec, t_vec, inliers = cv2.solvePnPRansac(objectPoints,keypoints,cameraMatrix,distCoeffs,flags=cv2.SOLVEPNP_EPNP,reprojectionError=5)
    PnP_time[i,:]=time.time()-start
    
    Rotation_matrix, _ = cv2.Rodrigues(R_vec)
    scipy_rotation_matrix=Rot.from_matrix(Rotation_matrix)
    quat=scipy_rotation_matrix.as_quat()

    if success==True:
        export_PnP_success[i,:]=success
        export_inliers_nr[i,:]=len(inliers)

    export_position[i,:]=t_vec.transpose()
    export_quat[i,:]=quat

    start=time.time()
    R_vec, t_vec=cv2.solvePnPRefineLM(objectPoints[inliers,:],keypoints[inliers,:],cameraMatrix,distCoeffs,R_vec, t_vec)
    LM_time[i,:]=time.time()-start

    Rotation_matrix_LM, _ = cv2.Rodrigues(R_vec)
    scipy_rotation_matrix_LM=Rot.from_matrix(Rotation_matrix_LM)
    quat_LM=scipy_rotation_matrix_LM.as_quat()
    export_position_LM[i,:]=t_vec.transpose()
    export_quat_LM[i,:]=quat_LM

    #image_overall_time[i,:]=time.time()-image_time_start
    #plt.imshow(image[:,:,0],cmap='gray', vmin=0, vmax=255)
    
    #plt.plot(keypoints[:,0],keypoints[:,1],'.')
    #plt.scatter=(keypoints[:,0],keypoints[:,1],10)
    #plt.show()
    


time_elapsed=time.time()-start_all
print(time_elapsed)

Export inference data to json file format.

In [None]:
import json

i=-1;
for item in images_names:
  i=i+1;
  if i==0:
    data_all=[{
      'image': images_names[i],
      'bbox_coords': (export_bbox_coords[i]).tolist(),
      'sdn_inference_time': (sdn_inference_time[i]).tolist(),
      'keypoints': (export_keypoints[i]).tolist(),
      'lrn_inference_time': (lrn_inference_time[i]).tolist(),
      'PnP_success': (export_PnP_success[i]).tolist(),
      'PnP_inliers_nr': (export_inliers_nr[i]).tolist(),
      'PnP_time': (PnP_time[i]).tolist(),
      'LM_time': (LM_time[i]).tolist(),
      'Load_time': (load_image_time[i]).tolist(),
      'position': (export_position[i]).tolist(),
      'quaternions': (export_quat[i]).tolist(),
      'position_LM': (export_position_LM[i]).tolist(),
      'quaternions_LM': (export_quat_LM[i]).tolist(),
      'overall_image_time': (image_overall_time[i]).tolist(),
    }]
  else:
    data_item={
      'image': images_names[i],
      'bbox_coords': (export_bbox_coords[i]).tolist(),
      'sdn_inference_time': (sdn_inference_time[i]).tolist(),
      'keypoints': (export_keypoints[i]).tolist(),
      'lrn_inference_time': (lrn_inference_time[i]).tolist(),
      'PnP_success': (export_PnP_success[i]).tolist(),
      'PnP_inliers_nr': (export_inliers_nr[i]).tolist(),
      'PnP_time': (PnP_time[i]).tolist(),
      'LM_time': (LM_time[i]).tolist(),
      'Load_time': (load_image_time[i]).tolist(),
      'position': (export_position[i]).tolist(),
      'quaternions': (export_quat[i]).tolist(),
      'position_LM': (export_position_LM[i]).tolist(),
      'quaternions_LM': (export_quat_LM[i]).tolist(),
      'overall_image_time': (image_overall_time[i]).tolist(),
    }
    data_all.append(data_item)  

parsed = json.dumps(data_all,indent=4)

with open(os.path.join(json_dest_file),'w') as f:
        f.write(parsed)