# 딥드림

CNN이 학습한 표현을 사용하여 예술적으로 이미지를 조작하는 기법

컨브넷을 거꾸로 실행하는 컨브넷 필터 시각화 기법과 동일

In [29]:
#Inception_v3 모델을 사용

from tensorflow.keras.applications import inception_v3
from tensorflow.keras import backend as K
from tensorflow as tf
K.set_learning_phase(0) #이 명령은 모든 훈련 연산을 비활성화합니다.

model = inception_v3.InceptionV3(weights='imagenet', include_top=False) #conv_base층만 사용한 인셉션 V3 네트워크를 만듭니다.

In [30]:
model.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv2d_188 (Conv2D)             (None, None, None, 3 864         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_188 (BatchN (None, None, None, 3 96          conv2d_188[0][0]                 
__________________________________________________________________________________________________
activation_188 (Activation)     (None, None, None, 3 0           batch_normalization_188[0][0]    
_______________________________________________________________________________________

In [31]:
layer_contributions = {
    'mixed2':0.2,
    'mixed3':3.,
    'mixed4':2.,
    'mixed5':1.5,
} #층 이름과 계수를 맵핑한 딕셔너리입니다. 최대화하려는 손실에 층의 활성화가 기여할 양을 정합니다. 층 이름은 인셉션V3에 하드코딩되어 있는 것입니다.

그 다음은 손실을 계산 - 경사상승법으로 최대화할 값

손실 텐서를 정의합니다

In [32]:
layer_dict = dict([(layer.name, layer) for layer in model.layers])

loss = K.variable(0.) #손실을 정의하고, 각 층의 기여 분을 이 스칼라 변수에 추가할 것입니다.

for layer_name in layer_contributions:
    coeff = layer_contributions[layer_name]
    activation = layer_dict[layer_name].output
    
    scaling = K.prod(K.cast(K.shape(activation), 'float32'))
    loss = loss + (coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :]))/scaling) # 층 특성의 L2 norm을 loss에 추가합니다.

In [38]:
#경사 상승법 사용

dream = model.input # 이 텐서는 생성된 딥드림 이미지를 저장

grads = K.gradients(loss, dream)[0] #loss에 대한 딥드림의 그래디언트를 계산

grads /= K.maximum(K.mean(K.abs(grads)), 1e-7) #그래디언트를 정규화

outputs = [loss, grads]
fetch_loss_and_grads = K.function([dream], outputs) #주어진 input 이미지에서 loss와 gradient를 계산할 케라스 function객체를 만듭니다.

def eval_loss_and_grads(x):
    outs = fetch_loss_and_grads([x])
    loss_value = outs[0]
    grad_values = outs[1]
    return loss_value, grad_values

def gradient_ascent(x, iterations, steps, max_loss=None):
    for i in range(iterations):
        loss_value, grad_values = eval_loss_and_grads(x)
        if max_loss is not None and loss_value > max_loss:
            break
        print('...',i,'번째 손실: ', loss_value)
        x += step * grad_values
    return x

In [41]:
import scipy
from tensorflow.keras.preprocessing import image

In [52]:
def resize_img(img, size):
    img = np.copy(img)
    factors = (1, float(size[0])/img.shape[1], float(size[1])/img.shape[2],1)
    return scipy.ndimage.zoom(img, factors, order=1)

def save_img(img, fname):
    pil_img = deprocess_image(np.copy(img))
    image.save_img(fname, pil_img)
    
def preprocess_image(image_path):
    img = image.load_img(image_path)
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = inception_v3.preprocess_input(img)
    return img

def deprocess_image(x):
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, x.shape[2], x.shape[3]))
        x = x.transpose((1,2,0))
    else:
        x = x.reshape((x.shape[1], x.shape[2], 3))
    x /= 2.
    x += 0.5
    x *= 255.
    x = np.clip(x, 0, 255)
    return x

이미지를 처리하기 위한 스케일(옥타브) 리스트를 정의합니다.

스케일은 이전 스케일보다 104배 커지게, 작은 이미지로 시작해서 점점 크기를 키웁니다.

In [57]:
import numpy as np

step = 0.01
num_octave = 3
octave_scale = 1.4
iterations = 20

max_loss = 10.

base_image_path = './train/dog.12499.jpg'

img = preprocess_image(base_image_path)

original_shape = img.shape[1:3]
successive_shapes = [original_shape]
for i in range(1, num_octave):
    shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape])
    successive_shapes.append(shape)
    
successive_shapes = successive_shapes[::-1]

original_img = np.copy(img)
shrunk_original_img = resize_img(img, successive_shapes[0])


In [58]:
for shape in successive_shapes:
    print('처리할 이미지 크기', shape)
    img  = resize_img(img, shape)
    
    img = gradient_ascent(img, iterations=iterations, steps = step,  max_loss=max_loss)
    
    upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape)
    same_size_original = resize_img(original_img, shape)
    lost_detail = same_size_original - upscaled_shrunk_original_img
    img += lost_detail
    shrunk_original_img = resize_img(original_img, shape)
    save_img(img, fname='dream_at_scale_' + str(shape) + '.png')
    
save_img(img, fname='./train/dog.12499.deepdream.png')

처리할 이미지 크기 (204, 153)
... 0 번째 손실:  1.5846369
... 1 번째 손실:  2.1007843
... 2 번째 손실:  2.862856
... 3 번째 손실:  3.476365
... 4 번째 손실:  4.070114
... 5 번째 손실:  4.440911
... 6 번째 손실:  5.021138
... 7 번째 손실:  5.6168666
... 8 번째 손실:  6.1207914
... 9 번째 손실:  6.6547823
... 10 번째 손실:  7.102113
... 11 번째 손실:  7.5338445
... 12 번째 손실:  7.940491
... 13 번째 손실:  8.503634
... 14 번째 손실:  8.946257
... 15 번째 손실:  9.330797
... 16 번째 손실:  9.810503
처리할 이미지 크기 (285, 214)
... 0 번째 손실:  2.6667328
... 1 번째 손실:  3.9742994
... 2 번째 손실:  5.007021
... 3 번째 손실:  5.9207215
... 4 번째 손실:  6.750062
... 5 번째 손실:  7.4744854
... 6 번째 손실:  8.184249
... 7 번째 손실:  8.875222
... 8 번째 손실:  9.503553
처리할 이미지 크기 (400, 300)
... 0 번째 손실:  2.8255305
... 1 번째 손실:  3.9660401
... 2 번째 손실:  5.14983
... 3 번째 손실:  6.1900244
... 4 번째 손실:  7.2778306
... 5 번째 손실:  8.242529
... 6 번째 손실:  9.531812


# 뉴럴 스타일 트랜스퍼

1. 스타일 참조 이미지, 타깃 이미지, 생성된 이미지를 위해 VGG19의 층 활성화를 동시에 계산하는 네트워크를 설정합니다.



2. 세 이미지에서 계산한 층 활성화를 사용하여 앞서 설명한 손실 함수를 정의합니다. 이 손실을 최소화하여 스타일 트랜스퍼를 구현할 것입니다.


3. 손실 함수를 최소화할 경사 하강법 과정을 설정합니다.


![Neural_Style_Transfer](캡처.JPG)

In [2]:
# 모두 높이가 400픽셀이 되도록 크기를 변경
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array, save_img

In [3]:
target_image_path = './style_transfer/target.jpg'
style_reference_image_path = './style_transfer/style2.jpg'

width, height = load_img(target_image_path).size
img_height = 400
img_width = int(width * img_height / height)

In [4]:
import numpy as np
from tensorflow.keras.applications import vgg19

def preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_height, img_width))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return img

def deprocess_image(x):
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    x = x[:, :, ::-1]
    x = np.clip(x,0,255).astype('uint8')
    return x
    

In [5]:
from tensorflow.keras import backend as K

target_image = K.constant(preprocess_image(target_image_path))
style_reference_image = K.constant(preprocess_image(style_reference_image_path))
combination_image = K.placeholder((1, img_height, img_width, 3)) # 생성된 이미지를 담은 place holder

input_tensor = K.concatenate([target_image, style_reference_image, combination_image], axis=0) # 세개의 이미지를 하나의 배치로 합칩니다.

model = vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False)
#세 이미지의 배치를 입력으로 받는 VGG네트워크를 만듭니다. 이 모델은 사전 훈련된 ImageNet 가중치를 로드합니다.

print('모델 로드 완료')

모델 로드 완료


In [6]:
def content_loss(base, combination):
    return K.sum(K.square(combination - base))

In [7]:
def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2,0,1)))
    gram = K.dot(features, K.transpose(features))
    return gram

def style_loss(style, combination):
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_height * img_width
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

In [8]:
def total_variation_loss(x):
    a = K.square( x[:, :img_height - 1, :img_width -1, :] - x[:, 1:, :img_width -1, :])
    b = K.square( x[:, :img_height - 1, :img_width -1, :] - x[:, :img_height -1, 1:,:])
    return K.sum(K.pow(a+b, 1.25))

In [9]:
model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(3, 400, 533, 3)]        0         
_________________________________________________________________
block1_conv1 (Conv2D)        (3, 400, 533, 64)         1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (3, 400, 533, 64)         36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (3, 200, 266, 64)         0         
_________________________________________________________________
block2_conv1 (Conv2D)        (3, 200, 266, 128)        73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (3, 200, 266, 128)        147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (3, 100, 133, 128)        0     

In [10]:
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) # 층 이름과 활성화할 텐서를 매핑할 딕셔너리

content_layer = 'block5_conv2'
style_layers = ['block1_conv1','block2_conv1','block3_conv1','block4_conv1','block5_conv1']

total_variation_weight = 1e-4
style_weight = 1.
content_weight = 0.025


loss = K.variable(0.)
layer_features = outputs_dict[content_layer]
target_image_features = layer_features[0,:,:,:]
combination_features = layer_features[2,:,:,:]
loss = loss + content_weight * content_loss(target_image_features, combination_features)

for layer_name in style_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1, :,:,:]
    combination_features = layer_features[2,:,:,:]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(style_layers)) * sl
    
loss += total_variation_weight * total_variation_loss(combination_image)
    



In [None]:
tf.compat.v1.disable_eager_execution()
import tensorflow as tf
graph = tf.get_default_graph()

In [None]:
grads = K.gradients(loss, combination_image)[0]
output=[loss,grads]
fetch_loss_and_grads = K.function([combination_image], output)

class Evaluator(object):
    
    def __init__(self):
        self.loss_value = None
        self.grads_values = None
        
    def loss(self, x):
        assert self.loss_value is None
        x = x.reshape((1, img_height, img_width, 3))
        outs = fetch_loss_and_grads([x])
        loss_value = outs[0]
        grad_values = outs[1].flatten().astype('float64')
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value
    
    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values
    
evaluator = Evaluator()

In [18]:
from scipy.optimize import fmin_l_bfgs_b
import time

In [None]:
result_prefix = 'style_transfer_result'
iterations = 20

x = preprocess_image(target_image_path)
x = x.flatten()
for i in range(iterations):
    print('반복 횟수:',i)
    start_time = time.time()
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x, fprime=evaluator.grads, maxfun=20)
    
    print('현재 손실 값:', min_val)
    img = x.copy().reshape((img_height, img_width, 3))
    img = deprocess_image(img)
    fname = result_prefix + '_at_iterations_%d.png' % i
    save_img(fname, img)
    print('저장 이미지:', fname)
    end_time = time.time()
    print('%d 번째 반복 완료: %ds' % (i, end_time - start_time))