## Theory
- Content of original image to be retained in generated image
- Style of reference image to be adopted to generated image

## Loss
$$loss = distance(style(ref\space image) - style(generated\space image)) + distance(content(org\space image) - content(generated\space image))$$

distance is l2 norm

### Content Loss
Loss between the top layer
### Style Loss
Correlation of activation
Loss for all the layers
### Gram Matrix
- Used in style loss because it uses all the layers

$$loss = \alpha content(loss) + \beta style(loss)$$
alpha - to be more than beta

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
import keras.backend as K
from tensorflow.image import total_variation
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

In [None]:
model = VGG16(include_top=False)
model.trainable = False
model.summary()

In [None]:
## For content transfer check the loss between block5_conv3 in both content image and generated image
## For style transfer check the Gram matrix loss for block1_conv1, block2_conv1, block3_conv1, block4_conv1, block5_conv1

In [None]:
def load_image(image_path, target_size=None):
    img = load_img(image_path, target_size=target_size)
    img = img_to_array(img)
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)
    return img

In [None]:
content = load_image('content_img.jpeg', [250, 250])
style = load_image('ref_img.jpeg', [510, 510])

In [None]:
# code
def deprocess(img):
    # perform the inverse of the pre processing step
    img[:, :, 0] += 103.939
    img[:, :, 1] += 116.779
    img[:, :, 2] += 123.68
    # convert RGB to BGR
    img = img[:, :, ::-1]

    img = np.clip(img, 0, 255).astype('uint8')
    return img

def display_image(image):
    # remove one dimension if image has 4 dimension
    if len(image.shape) == 4:
        img = np.squeeze(image, axis=0)

    img = deprocess(img)

    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(img)
    return

In [None]:
display_image(content)

In [None]:
display_image(style)

In [None]:
content_layer = 'block5_conv3'
content_model = Model(model.input,
                     model.get_layer(content_layer).output)

In [None]:
style_layers = ['block5_conv1',
              'block4_conv1',
              'block3_conv1',
              'block2_conv1',
              'block1_conv1']
style_layers.sort()
style_model = Model(model.input,
                   [model.get_layer(layer).output for layer in style_layers])

In [None]:
def content_loss(base, combination):
    img_loss = content_model(base)
    gen_loss = content_model(combination)
    return K.sum(K.square(img_loss - gen_loss))

def gram_matrix(features):
    features = tf.reshape(features, [-1, features.shape[-1]])
    return K.dot(features, K.transpose(features))

def style_loss(style, combination):
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channel = 3
    size = img_height * img_width
    return K.sum(K.square(S-C))/(4 * (channel**2) * (size**2))


### Total Variation Loss
To avoid blurring of the image due to increase in pixel size

In [None]:
genertaed_images = []
def train_model(contnet_img, style_img, iterations=500, content_weight=2.5, style_weight=0.001, total_var_weight=1e-5):
    content_img = load_image(contnet_img, [125, 125])
    style_img = load_image(style_img, [125, 125])
    generated_img = tf.Variable(content_img, dtype=tf.float32)
    display_image(generated_img)
    opt = tf.keras.optimizers.Adam(learning_rate=3.5)

    best_loss = np.inf
    best_img = None
    for i in range(iterations):
        with tf.GradientTape() as tape:
            C_loss = content_loss(content_img, generated_img)
            S_loss_layers = style_model(style_img)
            G_S_loss_layers = style_model(generated_img)
            S_loss = 0
            for S_loss_layer, G_loss_layer in zip(S_loss_layers, G_S_loss_layers):
                S_loss += style_weight*style_loss(S_loss_layer, G_loss_layer)
            V_loss = total_variation(generated_img)
            T_loss = C_loss*content_weight + S_loss*style_weight/len(style_layers) + V_loss*total_var_weight

        grads = tape.gradient(T_loss, generated_img)
        opt.apply_gradients([(grads, generated_img)])

        if T_loss < best_loss:
            best_loss = T_loss
            best_img = generated_img.numpy()

        print(f'Iteration:{i}\nLoss:{T_loss}')
        genertaed_images.append(generated_img)
    return best_img



In [None]:
img_height = 125; img_width = 125
final_img = train_model('content_img.jpeg', 'ref_img.jpeg')

In [None]:
display_image(genertaed_images[len(genertaed_images) - 1])