In [1]:
import numpy as np 
import pandas as pd 
import os
import sys 
import torch 
from matplotlib import pyplot as plt 
from glob import glob 
import cv2 
import open3d as o3d   
import gc
gc.collect()

sys.path.append("../src/")
from utils.load_tof_images import create_from_zip_absolute  as load_assignment_data
from depth_model import inference as infer

id = "253ebd40-1ddd-11ed-8dde-f768ee859a71"
path = "../data/360_scans/"+id
rgb_files = glob(path+"/rgb/*")
print("Total RGB images ",len(rgb_files))


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
Total RGB images  24


In [2]:
rgb_fpath = rgb_files[np.random.randint(0,len(rgb_files))]
print(rgb_fpath)
depth_fpath = rgb_fpath.replace('rgb','depth')
calib_fpath = os.path.dirname(rgb_fpath).replace('rgb','calibration/0')

data = load_assignment_data(rgb_fpath=rgb_fpath,depthmap_fpath=depth_fpath,calibration_fpath=calib_fpath)
rgb_image = data[8]
depth_map = data[3]

plt.subplot(1,2,1)
plt.imshow(rgb_image)
plt.subplot(1,2,2)
plt.imshow(depth_map)

../data/360_scans/253ebd40-1ddd-11ed-8dde-f768ee859a71/rgb/22


<matplotlib.image.AxesImage at 0x7f145f5704c0>

In [3]:
# print properties:
print(f"Image resolution: {depth_map.shape}")
print(f"Data type: {depth_map.dtype}")
print(f"Min value: {np.min(depth_map)}")
print(f"Max value: {np.max(depth_map)}")

Image resolution: (240, 180)
Data type: float64
Min value: 0.0
Max value: 7.412


### Setting the Depth camera calibration for Asssignment dataset 

- The calibration matrix M is a 3×3 matrix:

                | fx 0   cx |
                | 0  fy  cy |
                | 0  0   1  |

Where fx, fy and cx, cy are the focal length and the optical centers

- Point cloud computing
Computing point cloud here means transforming the depth pixel from the depth image 2D coordinate system to the depth camera 3D coordinate system (x, y and z). The 3D coordinates are computed using the following formulas, where depth(i, j) is the depth value at the row i and column j:
            
            | z = depth(i,j)       |
            | x = ( (j-cx) x z)/fx |
            | y = ( (i-cy) x z)/fy |
            
[Link](https://betterprogramming.pub/point-cloud-computing-from-rgb-d-images-918414d57e80) to info

In [4]:
# Depth camera parameters:
FX_DEPTH = 0.7811297
FY_DEPTH = 1.5166936
CX_DEPTH = 0.50329405
CY_DEPTH = 0.5187362

In [None]:
visualize = True
if visualize:
    # get depth resolution:
    height, width = depth_map.shape
    length = height * width
    # compute indices:
    jj = np.tile(range(width), height)
    ii = np.repeat(range(height), width)
    # rechape depth image
    z = depth_map.reshape(length)
    # compute pcd:
    pcd = np.dstack([(ii - CX_DEPTH) * z / FX_DEPTH,
                    (jj - CY_DEPTH) * z / FY_DEPTH,
                    z]).reshape((length, 3))

    pcd_o3d = o3d.geometry.PointCloud()  # create point cloud object
    pcd_o3d.points = o3d.utility.Vector3dVector(pcd)  # set pcd_np as the point cloud points
    # Visualize:
    o3d.visualization.draw_geometries([pcd_o3d])

### Get Image Data from the standard script provided

In [6]:
data = load_assignment_data(rgb_fpath=rgb_fpath,depthmap_fpath=depth_fpath,calibration_fpath=calib_fpath)
rgb_image = data[8]
depth_map = data[3]
depth_scale = data[4]

In [8]:
child_bbox = infer.detect_child(rgb_image)

predicted_image = infer.inference_rgbimage(rgb_image=rgb_image,depth_image_size=rgb_image.shape[:2])

print("\n predicted Depth map.. Child coordinates are {}".format(child_bbox))

Fusing layers... 


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
2023-07-04 17:27:07.932051: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-04 17:27:07.972236: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



 predicted Depth map.. Child coordinates are [114  65 279 543]


In [10]:
# print properties:
print(f"Image resolution: {predicted_image.shape}")
print(f"Data type: {predicted_image.dtype}")
print(f"Min value: {np.min(predicted_image)}")
print(f"Max value: {np.max(predicted_image)}")
# predicted_image = DepthNorm(predicted_image,100)
torch.cuda.empty_cache()
gc.collect()

Image resolution: (640, 480)
Data type: float32
Min value: 3.4509356021881104
Max value: 12.22368049621582


107

In [11]:
plt.figure(figsize=(17,19))
plt.subplot(1,3,1)
plt.title("RGB Image")
plt.imshow(rgb_image)

plt.subplot(1,3,2)
plt.title("Depth Image")
plt.imshow(data[3],cmap='gray')

plt.subplot(1,3,3)
plt.title("Predicted Depth Image")
plt.imshow(predicted_image,cmap='gray')

# depth_instensity = np.array(255*predicted_image/0x0fff,# / 0x0fff,
#                             dtype=np.int8)
# iio.imwrite('grayscale.png', depth_instensity)

# success, encoded_image = cv2.imencode('.png', resize)
# encoded_image.tobytes()

<matplotlib.image.AxesImage at 0x7f13f6f0ec80>

In [21]:
# get depth resolution:
height, width = predicted_image.shape
length = height * width

# compute indices:
jj = np.tile(range(width), height)
ii = np.repeat(range(height), width)

# reshape depth image
z = predicted_image.reshape(length)

# compute pcd:
pcd = np.dstack([(ii - CX_DEPTH) * z / FX_DEPTH,
                 (jj - CY_DEPTH) * z / FY_DEPTH,
                 z]).reshape((length, 3))

In [23]:
mesh_coord_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=5, origin=[0, 0, 0])

In [24]:
pcd_o3d = o3d.geometry.PointCloud()  # create point cloud object
pcd_o3d.points = o3d.utility.Vector3dVector(pcd)  # set pcd_np as the point cloud points

# Visualize:
o3d.visualization.draw_geometries([pcd_o3d,mesh_coord_frame])



###  Points with Minimum  and max values at each axis 

In [14]:
x_max = max(pcd_o3d.points,key=lambda x: x[0])
y_max = max(pcd_o3d.points,key=lambda x: x[1])
z_max = max(pcd_o3d.points,key=lambda x: x[2])

x_min = min(pcd_o3d.points,key=lambda x: x[0])
y_min = min(pcd_o3d.points,key=lambda x: x[1])
z_min = min(pcd_o3d.points,key=lambda x: x[2])

print(x_max,y_max,z_max)
print(x_min,y_min,z_min)


[     4126.1      976.29      6.9688] [    -7.6275      3601.9      11.838] [    -7.8759      1027.4      12.224]
[    -7.8759      1027.4      12.224] [     -6.887     -3.6558      10.689] [     351.21      510.76      3.4509]


In [37]:
a = 65*320+114
b = 543*320+279
print(b) 
# 114  65 279 543
np.asarray(pcd_o3d.points)[20914:174039]


174039


array([[     302.17,      1001.5,      5.5542],
       [     301.88,      1004.2,      5.5488],
       [     301.57,      1006.8,      5.5432],
       ...,
       [     3273.3,      1284.7,       7.073],
       [       3268,      1287.3,      7.0615],
       [     3262.9,      1289.9,      7.0504]])