In [1]:
### this scripts loads an image and predicts the depth
from __future__ import absolute_import, division, print_function
%matplotlib inline

import os
import glob
import numpy as np
import PIL.Image as pil
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
from torchvision import transforms

import networks
from utils import download_model_if_doesnt_exist
import evaluate_depth
from layers import disp_to_depth

if torch.cuda.is_available():
    device = torch.device("cuda")
    print('running on gpu')
else:
    device = torch.device("cpu")
    
torch.backends.cudnn.benchmark = True

running on gpu


In [2]:
model_name = "mono_1024x320"

download_model_if_doesnt_exist(model_name)
encoder_path = os.path.join("models", model_name, "encoder.pth")
depth_decoder_path = os.path.join("models", model_name, "depth.pth")

# LOADING PRETRAINED MODEL
encoder = networks.ResnetEncoder(18, False)
depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4))

loaded_dict_enc = torch.load(encoder_path, map_location=device)
filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
encoder.load_state_dict(filtered_dict_enc)
encoder.to(device)

loaded_dict = torch.load(depth_decoder_path, map_location=device)
depth_decoder.load_state_dict(loaded_dict)

encoder.eval()
depth_decoder.to(device)
depth_decoder.eval();

In [11]:
# validation data
IMAGE_PATH = r'C:\Users\portierl4527\ARCADIS\Afstuderen Levi - Depth estimation - General\Cityscapes-Sequence\leftImg8bit_sequence_trainvaltest\test'
images = glob.glob(IMAGE_PATH+'\*.png')


# #train data
# IMAGE_PATH = "C:/Users/portierl4527/ARCADIS/Afstuderen Levi - Depth estimation - General/Cityscapes-Dataset/01_Prepared_Data/images/train"
# images = glob.glob(IMAGE_PATH+'\*.png')


#Preparing image
for i in tqdm(images):

    input_image = pil.open(i).convert('RGB')
    original_width, original_height = input_image.size

    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    input_image_resized = input_image.resize((feed_width, feed_height), pil.LANCZOS)

    input_image_pytorch = transforms.ToTensor()(input_image_resized).unsqueeze(0)
    input_image=input_image_pytorch.to(device)
    
    #prediction using monodepth
    with torch.no_grad():
        features = encoder(input_image)
        outputs = depth_decoder(features)
        
        disp = outputs[("disp", 0)]
        
        
    #convert back to original size
    disp_resized = torch.nn.functional.interpolate(disp,
    (original_height, original_width), mode="bilinear", align_corners=False)
    
    
    scaled_disp, depth = disp_to_depth(disp_resized, 0.1, 100) #inverse depth and depth
    
    
    disp_resized_np = depth.squeeze().cpu().numpy()
    
    #name for storing
    basename = os.path.basename(i)
    basename_no_ext = os.path.splitext(basename)[0]
    
    #save the numpy file
    savepath= r"C:\Users\portierl4527\ARCADIS\Afstuderen Levi - Depth estimation - General\Cityscapes-Dataset\monodepth\cityscapes_prediction\high"
     
    
    np.save(savepath +'/' + basename_no_ext + '_mono.npy', disp_resized_np)

100%|██████████████████████████████████████████████████████████████████████████████| 2100/2100 [06:55<00:00,  5.05it/s]
