In [4]:
import sys
import os
import numpy as np
import acl
import cv2
from PIL import Image
import glob
import time

atlas_path = "/home/HwHiAiUser/wubo/paintfix_python/atlas_utils"
sys.path.append(atlas_path)
print(sys.path)

from atlas_utils.constants import *
from acl_resource import AclResource
from atlas_utils.utils import *
from acl_model import Model
from atlas_utils.acl_image import AclImage


OUTPUT_DIR = './out/'
MODEL_PATH = "./model/hifill_34.om"
MODEL_MATMUL_PATH = "./model/matmul_paint_3072.om"
#MODEL_MATMUL_PATH = "./model/matmultst.om"
MODEL_WIDTH = 512
MODEL_HEIGHT = 512

INPUT_SIZE = 512  
ATTENTION_SIZE = 32 
MULTIPLE = 6

NPTYPE_FLOAT32 = np.float32

def sort(str_lst):
    return [s for s in sorted(str_lst)]

def resize_ave(img, MULTIPLE):
    img = img.astype(NPTYPE_FLOAT32)
    img_patches = extract_image_patches(img, MULTIPLE)
    img = np.mean(img_patches, axis=(2,3))
    return img
def reconstruct_residual_from_patches(residual, multiple):
    residual = np.reshape(residual, [ATTENTION_SIZE, ATTENTION_SIZE, multiple, multiple, 3])
    residual = np.transpose(residual, [0,2,1,3,4])
    return np.reshape(residual, [ATTENTION_SIZE * multiple, ATTENTION_SIZE * multiple, 3])

# extract image patches
def extract_image_patches(img, multiple):
    h, w, c = img.shape
    img = np.reshape(img, [h//multiple, multiple, w//multiple, multiple, c])
    img = np.transpose(img, [0,2,1,3,4])
    return img

def pre_process(raw_img, raw_mask):
    raw_mask = raw_mask.astype(NPTYPE_FLOAT32) / 255.
    raw_img = raw_img.astype(NPTYPE_FLOAT32)

    # resize raw image & mask to desinated size
    large_img = cv2.resize(raw_img,  (MULTIPLE * INPUT_SIZE, MULTIPLE * INPUT_SIZE), interpolation = cv2. INTER_LINEAR)
    large_mask = cv2.resize(raw_mask, (MULTIPLE * INPUT_SIZE, MULTIPLE * INPUT_SIZE), interpolation = cv2.INTER_NEAREST)
    
    # down-sample large image & mask to 512x512
    small_img = resize_ave(large_img, MULTIPLE)
    small_mask = cv2.resize(raw_mask, (INPUT_SIZE, INPUT_SIZE), interpolation = cv2.INTER_NEAREST)
    
    # set hole region to 1. and backgroun to 0.
    small_mask = 1. - small_mask
    return large_img, large_mask, small_img, small_mask

def read_imgs_masks(images, masks):
    paths_img = glob.glob(images + '/*.*[gG]')
    paths_mask = glob.glob(masks + '/*.*[gG]')
    paths_img = sort(paths_img)
    paths_mask = sort(paths_mask)
    print(paths_img)
    print(paths_mask)
    return paths_img, paths_mask
    
def matmul_ex(array_a, array_b, stream):
    in_dtype, out_dtype = 1, 1
    size_c = array_a.shape[0] * array_b.shape[1] * acl.data_type_size(float)

    dev_matrix_a, ret = acl.rt.malloc(array_a.nbytes, ACL_MEM_MALLOC_NORMAL_ONLY)
    dev_matrix_b, ret = acl.rt.malloc(array_b.nbytes, ACL_MEM_MALLOC_NORMAL_ONLY)
    dev_matrix_c, ret = acl.rt.malloc(size_c, ACL_MEM_MALLOC_NORMAL_ONLY)

    host_matrix_c, ret = acl.rt.malloc_host(size_c)
    
    array_a = np.ascontiguousarray(array_a)
    host_matrix_a = acl.util.numpy_to_ptr(array_a)
    
    array_b = np.ascontiguousarray(array_b)
    host_matrix_b = acl.util.numpy_to_ptr(array_b)
    
    ret = acl.rt.memcpy(dev_matrix_a, array_a.nbytes, host_matrix_a, array_a.nbytes, ACL_MEMCPY_HOST_TO_DEVICE)
    ret = acl.rt.memcpy(dev_matrix_b, array_b.nbytes, host_matrix_b, array_b.nbytes, ACL_MEMCPY_HOST_TO_DEVICE)
        
    ret = acl.blas.gemm_ex(0, 0, 0, 1024, 3072, 1024,
                    1, dev_matrix_a, 1024, float, dev_matrix_b, 3072, float,
                    0, dev_matrix_c, 1024, float, 0,
                    stream)
                    
    #check_ret("acl.mdl.execute", ret)
    ret = acl.rt.synchronize_stream(stream);   
    
    acl.rt.memcpy(host_matrix_c, size_c, dev_matrix_c, size_c, ACL_MEMCPY_DEVICE_TO_HOST);
    
    array_c = acl.util.ptr_to_numpy(host_matrix_c,
                                         (array_a.shape[0], array_b.shape[1]),
                                         NPTYPE_FLOAT16)
    print("ACL output:\n", array_c.shape)
    return array_c

def matmul_om(matmul_model,attention,residual):
    attention_reshape = attention.reshape(1024,1024)
    residual_reshape = residual.reshape(1024,3072*9)
    result = []
    for i in range(9):
        resi = residual_reshape[:,i*3072:(i+1)*3072]
        #matmul_ret = matmul_ex(attention_reshape,resi)
        #result.append(tmp.reshape(1024,3072))
        matmul_ret = matmul_model.execute([attention_reshape,resi])   
        tmp = matmul_ret[0]
        result.append(tmp.reshape(1024,3072))
    return np.hstack(result).reshape(ATTENTION_SIZE,ATTENTION_SIZE,3072*9)

# residual aggregation module
def residual_aggregate(model,residual, attention):
    residual = extract_image_patches(residual, MULTIPLE * INPUT_SIZE//ATTENTION_SIZE)
    residual = np.reshape(residual, [1, residual.shape[0] * residual.shape[1], -1])
    residual = matmul_om(model,attention,residual)
    #residual = np.matmul(attention, residual)
    residual = reconstruct_residual_from_patches(residual, MULTIPLE * INPUT_SIZE//ATTENTION_SIZE)
    return residual
    

def post_process(model,raw_img, large_img, large_mask, inpainted_512, img_512, mask_512, attention):
    # compute the raw residual map
    s = time.time()
    h, w, c = raw_img.shape
    low_base = cv2.resize(inpainted_512.astype(NPTYPE_FLOAT32), (INPUT_SIZE * MULTIPLE, INPUT_SIZE * MULTIPLE), interpolation = cv2.INTER_LINEAR) 
    low_large = cv2.resize(img_512.astype(NPTYPE_FLOAT32), (INPUT_SIZE * MULTIPLE, INPUT_SIZE * MULTIPLE), interpolation = cv2.INTER_LINEAR)
    residual = (large_img - low_large) * large_mask
    print('post_process before time', time.time() - s)
    # reconstruct residual map using residual aggregation module
    residual = residual_aggregate(model,residual, attention)
    print('post_process residual_aggregate time', time.time() - s)
    # compute large inpainted result
    res_large = low_base + residual
    res_large = np.clip(res_large, 0., 255.)

    # resize large inpainted result to raw size
    res_raw = cv2.resize(res_large, (w, h), interpolation = cv2.INTER_LINEAR)
    
    # paste the hole region to the original raw image
    mask = cv2.resize(mask_512.astype(NPTYPE_FLOAT32), (w, h), interpolation = cv2.INTER_LINEAR)
    mask = np.expand_dims(mask, axis=2)
    
    res_raw = res_raw * mask + raw_img * (1. - mask)
    return res_raw.astype(np.uint8)
   
def matmul_test(array_a, array_b, stream):
    in_dtype, out_dtype = 1, 1
    size_c = array_a.shape[0] * array_b.shape[1] * acl.data_type_size(ACL_FLOAT)
    #size_c = array_b.nbytes
    print(" size_c ", size_c)
    dev_matrix_a, ret = acl.rt.malloc(array_a.nbytes, ACL_MEM_MALLOC_NORMAL_ONLY)
    dev_matrix_b, ret = acl.rt.malloc(array_b.nbytes, ACL_MEM_MALLOC_NORMAL_ONLY)
    dev_matrix_c, ret = acl.rt.malloc(size_c, ACL_MEM_MALLOC_NORMAL_ONLY)

    host_matrix_c, ret = acl.rt.malloc_host(size_c)
    
    array_a = np.ascontiguousarray(array_a)
    host_matrix_a = acl.util.numpy_to_ptr(array_a)
    
    array_b = np.ascontiguousarray(array_b)
    host_matrix_b = acl.util.numpy_to_ptr(array_b)
    
    ret = acl.rt.memcpy(dev_matrix_a, array_a.nbytes, host_matrix_a, array_a.nbytes, ACL_MEMCPY_HOST_TO_DEVICE)
    ret = acl.rt.memcpy(dev_matrix_b, array_b.nbytes, host_matrix_b, array_b.nbytes, ACL_MEMCPY_HOST_TO_DEVICE)
    print(array_a.nbytes)  
    print(array_b.nbytes)    
    ret = acl.blas.gemm_ex(0, 0, 0, 2, 2, 2,
                    1, dev_matrix_a, 2, in_dtype, dev_matrix_b, 2, in_dtype,
                    0, dev_matrix_c, 2, out_dtype, 0,
                    stream)
                
    #check_ret("acl.mdl.execute", ret)
    ret = acl.rt.synchronize_stream(stream);   
    
    acl.rt.memcpy(host_matrix_c, size_c, dev_matrix_c, size_c, ACL_MEMCPY_DEVICE_TO_HOST);
    print(dev_matrix_c)
    
    array_c = acl.util.ptr_to_numpy(host_matrix_c,
                                         (array_a.shape[0], array_b.shape[1]),
                                         5)
    print("ACL output:\n", array_c)
    ret = acl.rt.free(dev_matrix_a)
    ret = acl.rt.free(dev_matrix_b)
    ret = acl.rt.free(dev_matrix_c)
    return array_c
    
def matmul_test_200(array_a, array_b, stream):
    in_dtype = 0
    out_dtype = 0
    size_c = array_a.shape[0] * array_b.shape[1] * acl.data_type_size(ACL_FLOAT)
    #size_c = array_b.nbytes
    print(" size_c ", size_c)

    dev_matrix_c, ret = acl.rt.malloc(size_c, ACL_MEM_MALLOC_NORMAL_ONLY)

    print(ret)
    array_a = np.ascontiguousarray(array_a)
    
    
    dev_matrix_a = acl.util.numpy_to_ptr(array_a)
    array_b = np.ascontiguousarray(array_b)
    print(array_b)
    dev_matrix_b = acl.util.numpy_to_ptr(array_b)
    #print(dev_matrix_b[0],dev_matrix_b[1])
    #output = acl.util.ptr_to_numpy(dev_matrix_a, (2,2), 11)
    #print(output)
    print(stream)
    print(array_a.nbytes)  
    print(array_b.nbytes)    
    ret = acl.blas.gemm_ex(0, 0, 0, 2, 2, 2,
                    1, dev_matrix_a, 2, in_dtype, dev_matrix_b, 2, in_dtype,
                    0, dev_matrix_c, 2, out_dtype, 1,
                    stream)
                
    print(ret)
    ret = acl.rt.synchronize_stream(stream);   
    
    
    array_c = acl.util.ptr_to_numpy(dev_matrix_c,
                                         (array_a.shape[0], array_b.shape[1]),
                                         11)
    print("ACL output:\n", array_c)
 
    return array_c
  
def main():

    #read img/mask directory 
#     if (len(sys.argv) != 3):
#         print("The App arg is invalid")
#         exit(1)

    if not os.path.exists(OUTPUT_DIR):
        os.mkdir(OUTPUT_DIR)

    #acl  init
    acl_resource = AclResource()
    stream = acl_resource.init()
    #deviceId = 0;
    #acl.rt.set_device(deviceId)
    #load model
    model = Model(acl_resource,MODEL_PATH)
    matmul_om = Model(acl_resource,MODEL_MATMUL_PATH)
    '''
    ret = acl.op.set_model_dir(MODEL_MATMUL_PATH)
    a = np.array([[1,2],
                 [3,4]],dtype=np.float32)
    b = np.array([[5,6],
                 [7,8]],dtype=np.float32)
    #c=a.dot(b)
    #print(c)
    print("*"*30)
    ct = matmul_test_200(a, b, stream)
    print(ct)
    return 
    '''
    image_dir = './data'
    masks_dir = './mask'
    paths_img, paths_mask = read_imgs_masks(image_dir, masks_dir)
    for i in range(len(paths_img)):
        print('==========')
        s = time.time()
        raw_img = cv2.imread(paths_img[i]) 
        raw_mask = cv2.imread(paths_mask[i])
        
        cv.imshow('raw_img', raw_img)
        
        
        img_large, mask_large, img_512, mask_512 = pre_process(raw_img, raw_mask)

        img_512_hwc = np.ascontiguousarray(img_512)
        mask_512_hwc = mask_512[:,:,0:1]
        mask_512_hwc = mask_512_hwc.transpose(2,0,1).copy()        
        resultList  = model.execute([img_512_hwc, mask_512_hwc,])        
        inpainted_512 = resultList[0]
        inpainted_512_temp = np.squeeze(inpainted_512)        
        attention = resultList[1]
        mask_512_new = resultList[2] 
          

        # post-processing
        res_raw_size = post_process(matmul_om,raw_img, img_large, mask_large, inpainted_512[0], img_512, mask_512_new[0], attention[0])
        filename = './out/outpaint_' + os.path.basename(paths_img[i])
        cv2.imwrite(filename , res_raw_size)
        print('processing time', time.time() - s)
        
    print("Execute end")

if __name__ == '__main__':
    main()
 


['/home/HwHiAiUser/Ascend', '/home/HwHiAiUser/mhc/paintfix_python', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '', '/home/HwHiAiUser/.local/lib/python3.6/site-packages', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages', '/home/HwHiAiUser/.local/lib/python3.6/site-packages/IPython/extensions', '/tmp/tmpsugl3pwn', '/home/HwHiAiUser/wubo/paintfix_python/atlas_utils', '/home/HwHiAiUser/wubo/paintfix_python/atlas_utils', '/home/HwHiAiUser/wubo/paintfix_python/atlas_utils', '/home/HwHiAiUser/wubo/paintfix_python/atlas_utils']
[Sample] init resource stage:


Exception: acl.rt.set_device failed ret=100002

In [5]:
! python3 test3072.py ./data ./mask

['/home/HwHiAiUser/mhc/paintfix_python', '/home/HwHiAiUser/Ascend', '/home/HwHiAiUser/mhc/paintfix_python', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '/home/HwHiAiUser/.local/lib/python3.6/site-packages', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages', '/home/HwHiAiUser/wubo/paintfix_python/atlas_utils']
[Sample] init resource stage:
Init resource success
load model  ./model/hifill_34.om
Init model resource
[Model] create model output dataset:
[Model] create model output dataset success
[Model] class Model init resource stage success
load model  ./model/matmul_paint_3072.om
Init model resource
[Model] create model output dataset:
[Model] create model output dataset success
[Model] class Model init resource stage success
['./data/1.jpg', './data/3.jpg', './data/4.jpg']
['./mask/1.jpg', './mask/3.jpg', './mask/4.jpg']
acl.mdl.execute cost 0:00:00.109354, model_id=1
post_process before time 0.12269973754882812
acl.mdl.exec