<a href="https://colab.research.google.com/github/SunG206/3D-Reconstruction-with-Deep-Learning-Methods/blob/master/Tiled_ZoeDepth%2C_v2%20Tiled%20STL%20included%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Depth Map Generation**

This is an adapted version of https://colab.research.google.com/github/isl-org/ZoeDepth/blob/main/notebooks/ZoeDepth_quickstart.ipynb

Corresponding paper : [ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth](https://arxiv.org/abs/2302.12288v1)

Here, higher resolution depth maps are generated from the following process:

1.   Generate a depth map for the overall image
2.   Split original image into overlapping tiles
3.   Generate depth maps for the tiles
4.   Reassemble into a single depth map by applying gradient masks and average weighting from first depth map
5.   Repeat steps 2-4 at higher resolution
6.   Combine all three depth maps by:
  *   Calculate edge filter from original RGB image
  *   Blur edge filter and use as mask for high resolution depth map
  *   Apply masked high resolution to average of low and medium resolution depth maps

It is possible to use BYOD to replace step 6:
https://github.com/compphoto/BoostYourOwnDepth

However, it currently leads to banding due to 8-bit images being used instead of 16 bit:
https://github.com/compphoto/BoostingMonocularDepth/issues/62

<br>

My repository page for this project:
https://github.com/BillFSmith/TilingZoeDepth

In [1]:
#@title Code (press Ctrl + F9 to run)

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import FloatSlider
from ipywidgets import GridspecLayout

import os, contextlib
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        import numpy as np
        !pip install numpy-stl
        from stl import mesh
        import cv2


from google.colab import files

grid_options = GridspecLayout(5, 4)

file_name = {}
file_name['name'] = ''
file_name['depth'] = ''
file_name['stl'] = ''


def import_clicked(change):

    if change.name == 'value':
        # print('import')

        file_name['name'] = list(change.new.keys())[0]
        # print(file_name)

    if file_name['name'] != '':
          # with open("./userimg", "wb") as fp:
          with open("/content/userimg", "wb") as fp:
              fp.write(import_button.value[file_name['name']]['content'])

          print('Uploaded')

          grid_options[1, 2] = widgets.Label(str(file_name['name']) + ' uploaded')
          grid_options[2, 2] = widgets.Label('')
          grid_options[3, 2] = widgets.Label('')

          file_name['depth'] = "".join(str(file_name['name']).split('.')[:-1]) + '_depth.png'
          file_name['stl'] = "".join(str(file_name['name']).split('.')[:-1]) + '.stl'

import_button = widgets.FileUpload(
    # accept='.jpeg', '.jpg', .
    multiple=False,
)

import_button.style.button_color = '#ededed'
import_button.description = 'Upload Image'
import_button.observe(import_clicked)

dependencies = {}

def dependencies_clicked(change):
    # !pip install --upgrade torchvision==0.15.2
    # !pip install --upgrade timm==0.6.7 torch==2.1.0
    !pip install --upgrade timm==0.6.7 torch==2.0.1 torchvision==0.15.2 numpy==1.23.5 pillow==9.4.0

    !git clone https://github.com/isl-org/ZoeDepth.git
    %cd ZoeDepth
    # !python sanity.py

    import torch
    from zoedepth.utils.misc import get_image_from_url, colorize
    from PIL import Image
    import matplotlib.pyplot as plt

    zoe = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)

    # zoe = zoe.to('cuda')
    dependencies['zoe'] = zoe.to('cuda')

    grid_options[0, 2] = widgets.Label('Dependencies installed')

install_button = widgets.Button(description="Install dependencies")
install_button.style.button_color = '#ededed'
install_button.on_click(dependencies_clicked)

from PIL import Image
import matplotlib.pyplot as plt


def process_clicked(change):
    print('Processing Started')

    img = Image.open('/content/userimg')

    # Generate low resolution image
    # low_res_depth = zoe.infer_pil(img)
    low_res_depth = dependencies['zoe'].infer_pil(img)
    low_res_scaled_depth = 2**16 - (low_res_depth - np.min(low_res_depth)) * 2**16 / (np.max(low_res_depth) - np.min(low_res_depth))

    low_res_depth_map_image = Image.fromarray((0.999 * low_res_scaled_depth).astype("uint16"))
    low_res_depth_map_image.save('zoe_depth_map_16bit_low.png')

    # Generate filters

    # store filters in lists
    im = np.asarray(img)

    tile_sizes = [[4,4], [8,8]]

    filters = []

    save_filter_images = True

    for tile_size in tile_sizes:

        num_x = tile_size[0]
        num_y = tile_size[1]

        M = im.shape[0]//num_x
        N = im.shape[1]//num_y

        filter_dict = {}
        filter_dict['right_filter'] = np.zeros((M, N))
        filter_dict['left_filter'] = np.zeros((M, N))
        filter_dict['top_filter'] = np.zeros((M, N))
        filter_dict['bottom_filter'] = np.zeros((M, N))
        filter_dict['top_right_filter'] = np.zeros((M, N))
        filter_dict['top_left_filter'] = np.zeros((M, N))
        filter_dict['bottom_right_filter'] = np.zeros((M, N))
        filter_dict['bottom_left_filter'] = np.zeros((M, N))
        filter_dict['filter'] = np.zeros((M, N))

        for i in range(M):
          for j in range(N):
              x_value = 0.998*np.cos((abs(M/2-i)/M)*np.pi)**2
              y_value = 0.998*np.cos((abs(N/2-j)/N)*np.pi)**2

              if j > N/2:
                  filter_dict['right_filter'][i,j] = x_value
              else:
                  filter_dict['right_filter'][i,j] = x_value * y_value

              if j < N/2:
                  filter_dict['left_filter'][i,j] = x_value
              else:
                  filter_dict['left_filter'][i,j] = x_value * y_value

              if i < M/2:
                  filter_dict['top_filter'][i,j] = y_value
              else:
                  filter_dict['top_filter'][i,j] = x_value * y_value

              if i > M/2:
                  filter_dict['bottom_filter'][i,j] = y_value
              else:
                  filter_dict['bottom_filter'][i,j] = x_value * y_value

              if j > N/2 and i < M/2:
                  filter_dict['top_right_filter'][i,j] = 0.998
              elif j > N/2:
                  filter_dict['top_right_filter'][i,j] = x_value
              elif i < M/2:
                  filter_dict['top_right_filter'][i,j] = y_value
              else:
                  filter_dict['top_right_filter'][i,j] = x_value * y_value

              if j < N/2 and i < M/2:
                  filter_dict['top_left_filter'][i,j] = 0.998
              elif j < N/2:
                  filter_dict['top_left_filter'][i,j] = x_value
              elif i < M/2:
                  filter_dict['top_left_filter'][i,j] = y_value
              else:
                  filter_dict['top_left_filter'][i,j] = x_value * y_value

              if j > N/2 and i > M/2:
                  filter_dict['bottom_right_filter'][i,j] = 0.998
              elif j > N/2:
                  filter_dict['bottom_right_filter'][i,j] = x_value
              elif i > M/2:
                  filter_dict['bottom_right_filter'][i,j] = y_value
              else:
                  filter_dict['bottom_right_filter'][i,j] = x_value * y_value

              if j < N/2 and i > M/2:
                  filter_dict['bottom_left_filter'][i,j] = 0.998
              elif j < N/2:
                  filter_dict['bottom_left_filter'][i,j] = x_value
              elif i > M/2:
                  filter_dict['bottom_left_filter'][i,j] = y_value
              else:
                  filter_dict['bottom_left_filter'][i,j] = x_value * y_value

              filter_dict['filter'][i,j] = x_value * y_value

        filters.append(filter_dict)

        if save_filter_images:
            for filter in list(filter_dict.keys()):
                filter_image = Image.fromarray((filter_dict[filter]*2**16).astype("uint16"))
                filter_image.save(f'mask_{filter}_{num_x}_{num_y}.png')


    # filters second section
    compiled_tiles_list = []

    for i in range(len(filters)):

        num_x = tile_sizes[i][0]
        num_y = tile_sizes[i][1]

        M = im.shape[0]//num_x
        N = im.shape[1]//num_y

        compiled_tiles = np.zeros((im.shape[0], im.shape[1]))

        x_coords = list(range(0,im.shape[0],im.shape[0]//num_x))[:num_x]
        y_coords = list(range(0,im.shape[1],im.shape[1]//num_y))[:num_y]

        x_coords_between = list(range((im.shape[0]//num_x)//2, im.shape[0], im.shape[0]//num_x))[:num_x-1]
        y_coords_between = list(range((im.shape[1]//num_y)//2,im.shape[1],im.shape[1]//num_y))[:num_y-1]

        x_coords_all = x_coords + x_coords_between
        y_coords_all = y_coords + y_coords_between

        for x in x_coords_all:
            for y in y_coords_all:

                # depth = zoe.infer_pil(Image.fromarray(np.uint8(im[x:x+M,y:y+N])))
                depth = dependencies['zoe'].infer_pil(Image.fromarray(np.uint8(im[x:x+M,y:y+N])))


                scaled_depth = 2**16 - (depth - np.min(depth)) * 2**16 / (np.max(depth) - np.min(depth))

                if y == min(y_coords_all) and x == min(x_coords_all):
                    selected_filter = filters[i]['top_left_filter']
                elif y == min(y_coords_all) and x == max(x_coords_all):
                    selected_filter = filters[i]['bottom_left_filter']
                elif y == max(y_coords_all) and x == min(x_coords_all):
                    selected_filter = filters[i]['top_right_filter']
                elif y == max(y_coords_all) and x == max(x_coords_all):
                    selected_filter = filters[i]['bottom_right_filter']
                elif y == min(y_coords_all):
                    selected_filter = filters[i]['left_filter']
                elif y == max(y_coords_all):
                    selected_filter = filters[i]['right_filter']
                elif x == min(x_coords_all):
                    selected_filter = filters[i]['top_filter']
                elif x == max(x_coords_all):
                    selected_filter = filters[i]['bottom_filter']
                else:
                    selected_filter = filters[i]['filter']

                compiled_tiles[x:x+M, y:y+N] += selected_filter * (np.mean(low_res_scaled_depth[x:x+M, y:y+N]) + np.std(low_res_scaled_depth[x:x+M, y:y+N]) * ((scaled_depth - np.mean(scaled_depth)) /  np.std(scaled_depth)))

        compiled_tiles[compiled_tiles < 0] = 0
        compiled_tiles_list.append(compiled_tiles)

        tiled_depth_map = Image.fromarray((2**16 * 0.999 * compiled_tiles / np.max(compiled_tiles)).astype("uint16"))
        tiled_depth_map.save(f'tiled_depth_{i}.png')

    # combine depth maps
    from scipy.ndimage import gaussian_filter

    grey_im = np.mean(im,axis=2)

    tiles_blur = gaussian_filter(grey_im, sigma=20)
    tiles_difference = tiles_blur - grey_im

    tiles_difference = tiles_difference / np.max(tiles_difference)

    tiles_difference = gaussian_filter(tiles_difference, sigma=40)

    tiles_difference *= 5

    tiles_difference = np.clip(tiles_difference, 0, 0.999)

    mask_image = Image.fromarray((tiles_difference*2**16).astype("uint16"))
    mask_image.save('mask_image.png')

    combined_result = (tiles_difference * compiled_tiles_list[1] + (1-tiles_difference) * ((compiled_tiles_list[0] + low_res_scaled_depth)/2))/(2)

    combined_image = Image.fromarray((2**16 * 0.999* combined_result / np.max(combined_result)).astype("uint16"))
    # combined_image.save('combined_image.png')
    combined_image.save(file_name['depth'])

    # display output images

    print('Original low resolution result')
    plt.imshow(low_res_scaled_depth, 'magma')
    plt.axis("off")
    plt.show()

    print('\nNew high resolution result')
    plt.imshow(combined_result, 'magma')
    plt.axis("off")
    plt.show()

    print("Processing ended")
    grid_options[2, 2] = widgets.Label('Image processed')

process_button = widgets.Button(description="Process image")
process_button.style.button_color = '#ededed'
process_button.on_click(process_clicked)


def save_clicked(change):
    print("Save")
    # files.download('combined_image.png')
    files.download(file_name['depth'])

    grid_options[3, 2] = widgets.Label('Image saved')

save_button = widgets.Button(description="Save depth map")
save_button.style.button_color = '#ededed'
save_button.on_click(save_clicked)


def stl_clicked(change):
    # print("Save")
    # files.download('combined_image.png')
    # grid_options[3, 2] = widgets.Label('Image saved')

    # !pip install numpy-stl
    # from stl import mesh
    # import cv2

    # im = cv2.imread('combined_image.png', cv2.IMREAD_UNCHANGED)
    im = cv2.imread(file_name['depth'], cv2.IMREAD_UNCHANGED)

    im_array = np.array(im) #.transpose((1, 0, 2))
    im_array = np.rot90(im_array, -1, (0,1))

    mesh_size = [im_array.shape[0],im_array.shape[1]]

    mesh_max = np.max(im_array)

    if len(im_array.shape) == 3:
        scaled_mesh = mesh_size[0] * float(stl_aspect.value) * im_array[:,:,0] / mesh_max
    else:
        scaled_mesh = mesh_size[0] * float(stl_aspect.value) * im_array / mesh_max
    # rand_mesh = np.random.rand(mesh_size[0],mesh_size[1])

    mesh_shape = mesh.Mesh(np.zeros((mesh_size[0] - 1) * (mesh_size[1] - 1) * 2, dtype=mesh.Mesh.dtype))

    for i in range(0, mesh_size[0]-1):
        for j in range(0, mesh_size[1]-1):
            mesh_num = i * (mesh_size[1]-1) + j

            mesh_shape.vectors[2 * mesh_num][2] = [i, j, scaled_mesh[i,j]]
            mesh_shape.vectors[2 * mesh_num][1] = [i, j+1, scaled_mesh[i,j+1]]
            mesh_shape.vectors[2 * mesh_num][0] = [i+1, j, scaled_mesh[i+1,j]]

            mesh_shape.vectors[2 * mesh_num + 1][0] = [i+1, j+1, scaled_mesh[i+1,j+1]]
            mesh_shape.vectors[2 * mesh_num + 1][1] = [i, j+1, scaled_mesh[i,j+1]]
            mesh_shape.vectors[2 * mesh_num + 1][2] = [i+1, j, scaled_mesh[i+1,j]]

    mesh_shape.save(file_name['stl'])
    files.download(file_name['stl'])

stl_button = widgets.Button(description="Save STL")
stl_button.style.button_color = '#ededed'
stl_button.on_click(stl_clicked)

stl_aspect = widgets.widgets.BoundedFloatText(
    value=0.2,
    min=0,
    max=100,
    step=0.1,
    # description='Depth divided by width of object:',
    disabled=False
)

grid_options[0, 0] = widgets.Label('~120 seconds')
grid_options[0, 1] = install_button
grid_options[0, 2] = widgets.Label('')

grid_options[1, 0] = widgets.Label('~10 seconds')
grid_options[1, 1] = import_button
grid_options[1, 2] = widgets.Label('')

grid_options[2, 0] = widgets.Label('~120 seconds')
grid_options[2, 1] = process_button
grid_options[2, 2] = widgets.Label('')

grid_options[3, 0] = widgets.Label('~15 seconds')
grid_options[3, 1] = save_button
grid_options[3, 2] = widgets.Label('')

grid_options[4, 0] = widgets.Label('~110 seconds per megapixel')
grid_options[4, 1] = stl_button
grid_options[4, 2] = widgets.Label('Depth divided by width of object:')
grid_options[4, 3] = stl_aspect


grid_options


GridspecLayout(children=(Label(value='~120 seconds', layout=Layout(grid_area='widget001')), Button(description…

Collecting timm==0.6.7
  Downloading timm-0.6.7-py3-none-any.whl (509 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.0/510.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch==2.0.1
  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.15.2
  Downloading torchvision-0.15.2-cp310-cp310-manylinux1_x86_64.whl (6.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m92.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.1)
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m78.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.1)
  Down