first init code

LouieYang · Aug 6, 2017 · f82420b · f82420b
1 parent d9302f2
commit f82420b
Show file tree

Hide file tree

Showing 4 changed files with 866 additions and 0 deletions.
diff --git a/closed_form_matting.py b/closed_form_matting.py
@@ -0,0 +1,58 @@
+from __future__ import division
+import argparse
+import os
+import scipy.misc as spm
+import scipy.ndimage as spi
+import scipy.sparse as sps
+import numpy as np
+import tensorflow as tf
+
+def getlaplacian1(i_arr, consts, epsilon=1e-5, win_rad=1):
+    neb_size = (win_rad * 2 + 1) ** 2
+    h, w, c = i_arr.shape
+    img_size = w * h
+    consts = spi.morphology.grey_erosion(consts, footprint=np.ones(shape=(win_rad * 2 + 1, win_rad * 2 + 1)))
+
+    indsM = np.reshape(np.array(range(img_size)), newshape=(h, w), order='F')
+    tlen = int((-consts[win_rad:-win_rad, win_rad:-win_rad] + 1).sum() * (neb_size ** 2))
+    row_inds = np.zeros(tlen)
+    col_inds = np.zeros(tlen)
+    vals = np.zeros(tlen)
+    l = 0
+    for j in range(win_rad, w - win_rad):
+        for i in range(win_rad, h - win_rad):
+            if consts[i, j]:
+                continue
+            win_inds = indsM[i - win_rad:i + win_rad + 1, j - win_rad: j + win_rad + 1]
+            win_inds = win_inds.ravel(order='F')
+            win_i = i_arr[i - win_rad:i + win_rad + 1, j - win_rad: j + win_rad + 1, :]
+            win_i = win_i.reshape((neb_size, c), order='F')
+            win_mu = np.mean(win_i, axis=0).reshape(c, 1)
+            win_var = np.linalg.inv(
+                np.matmul(win_i.T, win_i) / neb_size - np.matmul(win_mu, win_mu.T) + epsilon / neb_size * np.identity(
+                    c))
+
+            win_i2 = win_i - np.repeat(win_mu.transpose(), neb_size, 0)
+            tvals = (1 + np.matmul(np.matmul(win_i2, win_var), win_i2.T)) / neb_size
+
+            ind_mat = np.broadcast_to(win_inds, (neb_size, neb_size))
+            row_inds[l: (neb_size ** 2 + l)] = ind_mat.ravel(order='C')
+            col_inds[l: neb_size ** 2 + l] = ind_mat.ravel(order='F')
+            vals[l: neb_size ** 2 + l] = tvals.ravel(order='F')
+            l += neb_size ** 2
+
+    vals = vals.ravel(order='F')[0: l]
+    row_inds = row_inds.ravel(order='F')[0: l]
+    col_inds = col_inds.ravel(order='F')[0: l]
+    a_sparse = sps.csr_matrix((vals, (row_inds, col_inds)), shape=(img_size, img_size))
+
+    sum_a = a_sparse.sum(axis=1).T.tolist()[0]
+    a_sparse = sps.diags([sum_a], [0], shape=(img_size, img_size)) - a_sparse
+
+    return a_sparse
+
+def getLaplacian(img):
+    h, w, _ = img.shape
+    coo = getlaplacian1(img, np.zeros(shape=(h, w)), 1e-5, 1).tocoo()
+    indices = np.mat([coo.row, coo.col]).transpose()
+    return tf.SparseTensor(indices, coo.data, coo.shape)
diff --git a/deep_photostyle.py b/deep_photostyle.py
@@ -0,0 +1,111 @@
+import argparse
+from PIL import Image
+import numpy as np
+from photo_style import stylize
+
+parser = argparse.ArgumentParser()
+# Input Options
+parser.add_argument("--content_image_path", dest='content_image_path',  nargs='?',
+                    help="Path to the content image")
+parser.add_argument("--style_image_path",   dest='style_image_path',    nargs='?',
+                    help="Path to the style image")
+parser.add_argument("--content_seg_path",   dest='content_seg_path',    nargs='?',
+                    help="Path to the style segmentation")
+parser.add_argument("--style_seg_path",     dest='style_seg_path',      nargs='?',
+                    help="Path to the style segmentation")
+parser.add_argument("--init_image_path",    dest='init_image_path',     nargs='?',
+                    help="Path to init image", default="")
+parser.add_argument("--output_image",       dest='output_image',        nargs='?',
+                    help='Path to output the stylized image', default="best_stylized.png")
+
+# Training Optimizer Options
+parser.add_argument("--max_iter",           dest='max_iter',            nargs='?', type=int,
+                    help='maximum image iteration', default=1000)
+parser.add_argument("--learning_rate",      dest='learning_rate',       nargs='?', type=float,
+                    help='learning rate for adam optimizer', default=1.0)
+parser.add_argument("--print_iter",         dest='print_iter',          nargs='?', type=int,
+                    help='print loss per iterations', default=1)
+# Note the result might not be smooth enough since not applying smooth for temp result
+parser.add_argument("--save_iter",          dest='save_iter',           nargs='?', type=int,
+                    help='save temporary result per iterations', default=100)
+parser.add_argument("--lbfgs",              dest='lbfgs',               nargs='?',
+                    help="True=lbfgs, False=Adam", default=True)
+
+# Weight Options
+parser.add_argument("--content_weight",     dest='content_weight',      nargs='?', type=float,
+                    help="weight of content loss", default=5e0)
+parser.add_argument("--style_weight",       dest='style_weight',        nargs='?', type=float,
+                    help="weight of style loss", default=1e2)
+parser.add_argument("--tv_weight",          dest='tv_weight',           nargs='?', type=float,
+                    help="weight of total variational loss", default=1e-3)
+parser.add_argument("--affine_weight",      dest='affine_weight',       nargs='?', type=float,
+                    help="weight of affine loss", default=1e4)
+
+# Style Options
+parser.add_argument("--style_option",       dest='style_option',        nargs='?', type=int,
+                    help="0=non-Matting, 1=only Matting, 2=first non-Matting, then Matting", default=0)
+parser.add_argument("--apply_smooth",       dest='apply_smooth',        nargs='?',
+                    help="if apply local affine smooth", default=True)
+
+# Smoothing Argument
+parser.add_argument("--f_radius",           dest='f_radius',            nargs='?', type=int,
+                    help="smooth argument", default=15)
+parser.add_argument("--f_edge",             dest='f_edge',              nargs='?', type=float,
+                    help="smooth argument", default=1e-1)
+
+args = parser.parse_args()
+
+def main():
+    if args.style_option == 0:
+        best_image_bgr = stylize(args, False)
+        result = Image.fromarray(np.uint8(np.clip(best_image_bgr[:, :, ::-1], 0, 255.0)))
+        result.save(args.output_image)
+    elif args.style_option == 1:
+        best_image_bgr = stylize(args, True)
+        if not args.apply_smooth:
+            result = Image.fromarray(np.uint8(np.clip(best_image_bgr[:, :, ::-1], 0, 255.0)))
+            result.save(args.output_image)
+        else:
+            # Pycuda runtime incompatible with Tensorflow
+            from smooth_local_affine import smooth_local_affine
+            content_input = np.array(Image.open(args.content_image_path).convert("RGB"), dtype=np.float32)
+            # RGB to BGR
+            content_input = content_input[:, :, ::-1]
+            # H * W * C to C * H * W
+            content_input = content_input.transpose((2, 0, 1))
+            input_ = np.ascontiguousarray(content_input, dtype=np.float32) / 255.
+
+            _, H, W = np.shape(input_)
+
+            output_ = np.ascontiguousarray(best_image_bgr.transpose((2, 0, 1)), dtype=np.float32) / 255.
+            best_ = smooth_local_affine(output_, input_, 1e-7, 3, H, W, args.f_radius, args.f_edge).transpose(1, 2, 0)
+            result = Image.fromarray(np.uint8(np.clip(best_ * 255., 0, 255.)))
+            result.save(args.output_image)
+    elif args.style_option == 2:
+        tmp_image_bgr = stylize(args, False)
+        result = Image.fromarray(np.uint8(np.clip(tmp_image_bgr[:, :, ::-1], 0, 255.0)))
+        result.save("./tmp_result.png")
+
+        args.init_image_path = "./tmp_result.png"
+        best_image_bgr = stylize(args, True)
+        if not args.apply_smooth:
+            result = Image.fromarray(np.uint8(np.clip(best_image_bgr[:, :, ::-1], 0, 255.0)))
+            result.save(args.output_image)
+        else:
+            from smooth_local_affine import smooth_local_affine
+            content_input = np.array(Image.open(args.content_image_path).convert("RGB"), dtype=np.float32)
+            # RGB to BGR
+            content_input = content_input[:, :, ::-1]
+            # H * W * C to C * H * W
+            content_input = content_input.transpose((2, 0, 1))
+            input_ = np.ascontiguousarray(content_input, dtype=np.float32) / 255.
+
+            _, H, W = np.shape(input_)
+
+            output_ = np.ascontiguousarray(best_image_bgr.transpose((2, 0, 1)), dtype=np.float32) / 255.
+            best_ = smooth_local_affine(output_, input_, 1e-7, 3, H, W, args.f_radius, args.f_edge).transpose(1, 2, 0)
+            result = Image.fromarray(np.uint8(np.clip(best_ * 255., 0, 255.)))
+            result.save(args.output_image)
+
+if __name__ == "__main__":
+    main()