In [None]:
import cv2
import torch
import urllib.request
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as _img
import os
from PIL import Image, ImageDraw, ImageFont


In [None]:
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
urllib.request.urlretrieve(url, filename)

In [None]:
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

In [None]:
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

In [None]:
##### resize
file_path = 'test.jpg'
img = cv2.imread(file_path)
h, w = img.shape[0], img.shape[1]
# print(h, w)
img = cv2.resize(img, (int(w/10), int(h/10)), interpolation=cv2.INTER_AREA)
print(img.shape)
cv2.imshow('output', img)
# h, w = img.shape[0], img.shape[1]
# img = cv2.resize(img, (h*0.2, w*0.2))
cv2.waitKey(0)
cv2.destroyAllWindows()

input_batch = transform(img).to(device)

In [None]:
with torch.no_grad():
    prediction = midas(input_batch)
    print(prediction)
    

    prediction_filter = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=img.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()


output = prediction_filter.cpu().numpy()


In [None]:
plt.imshow(output)

---
Add text on image

In [None]:
# make sure you have the fonts locally in a fonts/ directory
#georgia_bold = 'fonts/georgia_bold.ttf'
georgia_bold_italic = 'I.Ngaan.ttf'

txt = input() # text to render
txt_len = len(txt)
# W, H = (1280, 720) # image size
background = (0,0,0) # white
fontsize = 24
W, H = (fontsize*txt_len, fontsize) # image size
font = ImageFont.truetype(georgia_bold_italic, fontsize)

image = Image.new('RGBA', (W, H), background)
draw = ImageDraw.Draw(image)

# w, h = draw.textsize(txt) # not that accurate in getting font size
w, h = font.getsize(txt)

draw.text(((W-w)/2,(H-h)/2), txt, fill='lime', font=font)
# draw.text((10, 0), txt, (0,0,0), font=font)
# img_resized = image.resize((188,45), Image.ANTIALIAS)

save_location = os.getcwd()

# img_resized.save(save_location + '/sample.jpg')
image.save(save_location + '/sample.png')

In [None]:
text_img = Image.open('sample.png')
rgba = text_img.convert("RGBA")
datas = rgba.getdata()
  
newData = []
for item in datas:
    if item[0] == 0 and item[1] == 0 and item[2] == 0:  # finding black colour by its RGB value
        # storing a transparent value when we find a black colour
        newData.append((255, 255, 255, 0))
    else:
        newData.append(item)  # other colours remain unchanged
  
rgba.putdata(newData)
rgba.save("transparent_image.png", "PNG")

In [None]:
frontimage_path = 'transparent_image.png'
frontimg = cv2.imread(frontimage_path, cv2.IMREAD_UNCHANGED)
h, w = frontimg.shape[0], frontimg.shape[1]
frontimg = cv2.resize(frontimg, (int(w), int(h)), interpolation=cv2.INTER_AREA)
print(frontimg.shape)
# frontimg = cv2.cvtColor(frontimg, cv2.COLOR_BGR2BGRA)
cv2.imshow('output', frontimg)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# img_tmp = img.copy()
depth = 10
# x_offset = 0
# y_offset = 0
while True:
    img_tmp = img.copy()
    x_offset = 150
    y_offset = 100
    mask = img_tmp[x_offset:x_offset + frontimg.shape[0], y_offset:y_offset + frontimg.shape[1]].copy()
    
    indices = np.where(output > depth)
    # print(indices)
    # print(frontimg)

    mask[frontimg[:, :, 3] > 0] = frontimg[frontimg[:, :, 3] > 0][:, 0:3]
    
    
    img_tmp[x_offset:x_offset + frontimg.shape[0], y_offset:y_offset + frontimg.shape[1]] = mask
    img_tmp[indices] = img[indices]

    # indices = np.where(output < depth)
    # img_tmp[indices] = (3, 147, 5)
    # depth += 0.03
    
    cv2.imshow('output', img_tmp)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

In [None]:
# img_tmp = img.copy()
h, w = img.shape[0], img.shape[1]
edge_x = frontimg.shape[0] - h 
edge_y = frontimg.shape[1] - w

depth = 15
x_offset = 0
y_offset = 0
x_offset_add = 1
y_offset_add = 1
while True:
    img_tmp = img.copy()

    mask = img_tmp[x_offset:x_offset + frontimg.shape[0], y_offset:y_offset + frontimg.shape[1]].copy()

    indices = np.where(output > depth)
    # print(indices)
    # print(frontimg)

    mask[frontimg[:, :, 3] > 0] = frontimg[frontimg[:, :, 3] > 0][:, 0:3]
    
    
    img_tmp[x_offset:x_offset + frontimg.shape[0], y_offset:y_offset + frontimg.shape[1]] = mask
    img_tmp[indices] = img[indices]

    if x_offset + frontimg.shape[0] >= h:
        x_offset_add = -1
    elif x_offset <= 0:
        x_offset_add = 1
    if y_offset + frontimg.shape[1] >= w:
        y_offset_add = -1
    elif y_offset <= 0:
        y_offset_add = 1

    x_offset += x_offset_add
    y_offset += y_offset_add

    cv2.imshow('output', img_tmp)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

In [None]:
@article{Ranftl2021,
	author    = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun},
	title     = {Vision Transformers for Dense Prediction},
	journal   = {ArXiv preprint},
	year      = {2021},
}

In [None]:
@article{Ranftl2021,
	author    = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun},
	title     = {Vision Transformers for Dense Prediction},
	journal   = {ArXiv preprint},
	year      = {2021},
}