In [8]:
import glob
import os

import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms
from tqdm import tqdm

import model_io
import utils
from models import UnetAdaptiveBins


def _is_pil_image(img):
    return isinstance(img, Image.Image)


def _is_numpy_image(img):
    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})


class ToTensor(object):
    def __init__(self):
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    def __call__(self, image, target_size=(640, 480)):
        # image = image.resize(target_size)
        image = self.to_tensor(image)
        image = self.normalize(image)
        return image

    def to_tensor(self, pic):
        if not (_is_pil_image(pic) or _is_numpy_image(pic)):
            raise TypeError(
                'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))

        if isinstance(pic, np.ndarray):
            img = torch.from_numpy(pic.transpose((2, 0, 1)))
            return img

        # handle PIL Image
        if pic.mode == 'I':
            img = torch.from_numpy(np.array(pic, np.int32, copy=False))
        elif pic.mode == 'I;16':
            img = torch.from_numpy(np.array(pic, np.int16, copy=False))
        else:
            img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
        # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
        if pic.mode == 'YCbCr':
            nchannel = 3
        elif pic.mode == 'I;16':
            nchannel = 1
        else:
            nchannel = len(pic.mode)
        img = img.view(pic.size[1], pic.size[0], nchannel)

        img = img.transpose(0, 1).transpose(0, 2).contiguous()
        if isinstance(img, torch.ByteTensor):
            return img.float()
        else:
            return img


class InferenceHelper:
    def __init__(self, dataset='nyu', device='cuda:0'):
        self.toTensor = ToTensor()
        self.device = device
        if dataset == 'nyu':
            self.min_depth = 1e-3
            self.max_depth = 10
            self.saving_factor = 1000  # used to save in 16 bit
            model = UnetAdaptiveBins.build(n_bins=256, min_val=self.min_depth, max_val=self.max_depth)
            pretrained_path = os.path.join(os.getenv('HOME'), 'AdaBins') + "/pretrained/AdaBins_nyu.pt"
        elif dataset == 'kitti':
            self.min_depth = 1e-3
            self.max_depth = 80
            self.saving_factor = 256
            model = UnetAdaptiveBins.build(n_bins=256, min_val=self.min_depth, max_val=self.max_depth)
            pretrained_path = os.path.join(os.getenv('HOME'), 'AdaBins') + "/pretrained/AdaBins_kitti.pt"
        else:
            raise ValueError("dataset can be either 'nyu' or 'kitti' but got {}".format(dataset))

        model, _, _ = model_io.load_checkpoint(pretrained_path, model)
        model.eval()
        self.model = model.to(self.device)

    @torch.no_grad()
    def predict_pil(self, pil_image, visualized=False):
        # pil_image = pil_image.resize((640, 480))
        img = np.asarray(pil_image) / 255.

        img = self.toTensor(img).unsqueeze(0).float().to(self.device)
        print(img)
        bin_centers, pred = self.predict(img)

        if visualized:
            viz = utils.colorize(torch.from_numpy(pred).unsqueeze(0), vmin=None, vmax=None, cmap='magma')
            # pred = np.asarray(pred*1000, dtype='uint16')
            viz = Image.fromarray(viz)
            return bin_centers, pred, viz
        return bin_centers, pred

    @torch.no_grad()
    def predict(self, image):
        bins, pred = self.model(image)
        pred = np.clip(pred.cpu().numpy(), self.min_depth, self.max_depth)

        # Flip
        image = torch.Tensor(np.array(image.cpu().numpy())[..., ::-1].copy()).to(self.device)
        pred_lr = self.model(image)[-1]
        pred_lr = np.clip(pred_lr.cpu().numpy()[..., ::-1], self.min_depth, self.max_depth)

        # Take average of original and mirror
        final = 0.5 * (pred + pred_lr)
        final = nn.functional.interpolate(torch.Tensor(final), image.shape[-2:],
                                          mode='bilinear', align_corners=True).cpu().numpy()

        final[final < self.min_depth] = self.min_depth
        final[final > self.max_depth] = self.max_depth
        final[np.isinf(final)] = self.max_depth
        final[np.isnan(final)] = self.min_depth

        centers = 0.5 * (bins[:, 1:] + bins[:, :-1])
        centers = centers.cpu().squeeze().numpy()
        centers = centers[centers > self.min_depth]
        centers = centers[centers < self.max_depth]

        return centers, final

    @torch.no_grad()
    def predict_dir(self, test_dir, out_dir):
        os.makedirs(out_dir, exist_ok=True)
        transform = ToTensor()
        all_files = glob.glob(os.path.join(test_dir, "*"))
        self.model.eval()
        for f in tqdm(all_files):
            image = np.asarray(Image.open(f), dtype='float32') / 255.
            image = transform(image).unsqueeze(0).to(self.device)

            centers, final = self.predict(image)
            # final = final.squeeze().cpu().numpy()

            final = (final * self.saving_factor).astype('uint16')
            basename = os.path.basename(f).split('.')[0]
            save_path = os.path.join(out_dir, basename + ".png")

            Image.fromarray(final).save(save_path)



In [33]:
import matplotlib.pyplot as plt
from time import time
from pathlib import Path
from glob import glob
import matplotlib


base = "/home/jonfrey/datasets/scannet"
image_pths = [str(p) for p in glob( base+'/**/*.jpg', recursive=True ) if str(p).find('color') != -1]
fun = lambda x : x.split('/')[-3][-7:] + '_'+ str( "0"*(6-len( x.split('/')[-1][:-4]))) + x.split('/')[-1][:-4]  
image_pths.sort(key=fun)


inferHelper = InferenceHelper(  dataset='nyu', device='cuda:1' )

from torchvision import transforms as tf 
import torch
tra = torch.nn.Sequential(
    tf.Resize((480,640))
)
tra_up = torch.nn.Sequential(
    tf.Resize((472*2, 1216))
)

Loading base model ()...

Using cache found in /home/jonfrey/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master


Done.
Removing last two layers (global_pool & classifier).
Building Encoder-Decoder model..Done.


In [34]:
img = Image.open( image_pths[0] )

tra = torch.nn.Sequential(
    tf.Resize((472*2, 1216))
)
inferHelper.predict_pil( tra(img) ) 

RuntimeError: The size of tensor a (1102) must match the size of tensor b (500) at non-singleton dimension 2

In [20]:
start = time()
import imageio
### RESTART OVER NIGHT at scene0000_02__2082

for j, i in enumerate( image_pths ):
    idx = i.split('/')[-3]+'__'+i.split('/')[-1][:-4]
    img = tra( Image.open( i ))
    centers, pred = inferHelper.predict_pil(img)
              
    pred = torch.from_numpy(pred).numpy()
    
    Path( os.path.join( str(Path(i).parent.parent),'depth_estimate') ).mkdir(exist_ok=True)
    save_path = os.path.join( str(Path(i).parent.parent),'depth_estimate/'+ i.split('/')[-1] )
    store = (pred[0,0,:,:]) * 1000
    store = store.astype(np.uint16)
    save_path = save_path[:-4]+'.png'
    imageio.imwrite( save_path ,store) 
    
#     save_path = save_path[:-4]+'_preview.png'
#     plt.imshow(pred.squeeze(), cmap='magma_r')
#     plt.axis('off')
#     plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
#     plt.close()
    if j > 0:
        break
              
#     if j % 100 == 0:
#         print(j, '/',len(image_pths), '  ',time()-start,'s')
#         start = time()

TypeError: 'module' object is not callable

In [41]:
import os
import sys 
os.chdir(os.path.join(os.getenv('HOME'), 'ASL'))

sys.path.insert(0, os.path.join(os.getenv('HOME'), 'ASL'))
sys.path.append(os.path.join(os.path.join(os.getenv('HOME'), 'ASL') + '/src'))
import yaml

def file_path(string):
  if os.path.isfile(string):
    return string
  else:
    raise NotADirectoryError(string)

def load_yaml(path):
  with open(path) as file:  
    res = yaml.load(file, Loader=yaml.FullLoader) 
  return res


import coloredlogs
coloredlogs.install()
import time
import shutil
import datetime
import argparse
import signal
import yaml
import logging
from pathlib import Path
import gc

# Frameworks
import torch
import numpy as np
import imageio
# Costume Modules

from datasets import get_dataset

from torchvision import transforms
from torchvision import transforms as tf


DEVICE = 'cuda:1'

    
#   parser = argparse.ArgumentParser() 
#   parser.add_argument('--eval', type=file_path, default="/home/jonfrey/ASL/cfg/eval/eval.yml",
#                       help='Yaml containing dataloader config')
  
#   args = parser.parse_args()
env_cfg_path = os.path.join('cfg/env', os.environ['ENV_WORKSTATION_NAME']+ '.yml')
env_cfg = load_yaml(env_cfg_path)	
eval_cfg = load_yaml("/home/jonfrey/ASL/cfg/eval/eval.yml")

# SETUP MODEL
inferHelper = InferenceHelper(  dataset='nyu', device='cuda:1' )

# SETUP DATALOADER
dataset_test = get_dataset(
**eval_cfg['dataset'],
env = env_cfg,
output_trafo = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
)
dataloader_test = torch.utils.data.DataLoader(dataset_test,
shuffle = False,
num_workers =eval_cfg['loader']['num_workers'],
pin_memory = eval_cfg['loader']['pin_memory'],
batch_size = eval_cfg['loader']['batch_size'], 
drop_last = True)

# CREATE RESULT FOLDER
base = os.path.join(env_cfg['base'], eval_cfg['name'], eval_cfg['dataset']['name'])

globale_idx_to_image_path = dataset_test.image_pths

tra = tf.Resize((480,640))
tra_up = tf.Resize(eval_cfg['dataset']['output_size'])


Loading base model ()...

Using cache found in /home/jonfrey/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master


Done.
Removing last two layers (global_pool & classifier).
Building Encoder-Decoder model..Done.


In [47]:
# START EVALUATION
for j, batch in enumerate( dataloader_test ):
    print(j)
    images = batch[0].to(DEVICE)
    target = batch[1].to(DEVICE)
    ori_img = batch[2].to(DEVICE)
    replayed = batch[3].to(DEVICE)
    BS = images.shape[0]
    global_idx = batch[4] 
    
    centers, pred = inferHelper.predict( tra(images) )
    print(pred.shape, pred.dtype, type(pred))
    pred = tra_up(torch.from_numpy(pred)).numpy()
    
    for b in range(BS):
        img_path = globale_idx_to_image_path[global_idx[b]]
        p = os.path.join(base,
            img_path.split('/')[-3],
            'depth_estimate',
            img_path.split('/')[-1][:-4]+'.png')
        store = (pred[0,0,:,:]) * 1000
        store = store.astype(np.uint16)
        save_path = save_path[:-4]+'.png'
        imageio.imwrite( save_path ,store) 

    Path(p).parent.mkdir(parents=True, exist_ok=True)

    imageio.imwrite( p, np.moveaxis( img[b], [0,1,2], [2,0,1] ) )
    if j > 1:
        break

0
(1, 1, 480, 640) float32 <class 'numpy.ndarray'>


NameError: name 'save_path' is not defined