Neural Style Transfer

# Importing All Libraries

In [11]:
import tensorflow as tf
import numpy as np
import PIL.Image
import matplotlib.pyplot as plt
import time
from tensorflow.keras import mixed_precision

# GPU and Mixed Precision Setup

In [12]:
# Enable mixed precision for improved performance and reduced memory usage (optional)
mixed_precision.set_global_policy('mixed_float16')


# Configure GPU memory growth to avoid pre-allocating all VRAM

In [13]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# 1. Configuration

In [14]:
CONTENT_WEIGHT = 1e4
STYLE_WEIGHT = 1e-2
TV_WEIGHT = 30
STEPS = 1000
LEARNING_RATE = 0.02
MAX_DIM = 384

# 2. Image Handling Utilities

In [None]:
def load_img(path, max_dim=MAX_DIM):
    img = PIL.Image.open(path).convert('RGB')
    img.thumbnail((max_dim, max_dim))
    img = np.array(img)
    img = tf.keras.applications.vgg19.preprocess_input(img)
    return tf.expand_dims(tf.convert_to_tensor(img, dtype=tf.float32), 0)  # Ensure float32

def deprocess_img(processed_img):
    img = processed_img.numpy().squeeze()
    img += [103.939, 116.779, 123.68]
    img = img[:, :, ::-1]
    return np.clip(img, 0, 255).astype('uint8')

# 3. Model Setup

In [16]:
def get_model():
    """Build VGG19 model with intermediate outputs for content and style layers."""
    vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
    vgg.trainable = False
    content_layer = 'block5_conv2'
    style_layers = [f'block{i}_conv1' for i in range(1, 6)]
    outputs = [vgg.get_layer(content_layer).output] + [vgg.get_layer(layer).output for layer in style_layers]
    return tf.keras.Model(vgg.input, outputs)

def gram_matrix(tensor):
    """Compute the Gram matrix for style representation."""
    channels = int(tensor.shape[-1])
    a = tf.reshape(tensor, [-1, channels])
    return tf.matmul(a, a, transpose_a=True) / tf.cast(tf.shape(a)[0], tf.float32)

# 4. Loss Functions

In [17]:
def content_loss(content, generated):
    """Mean squared error between content and generated features."""
    return tf.reduce_mean(tf.square(content - generated))

def style_loss(style, generated):
    """Mean squared error between style and generated Gram matrices."""
    return tf.reduce_mean(tf.square(style - generated))

def total_variation_loss(image):
    """Total variation loss to promote image smoothness."""
    x_diff = image[:, 1:, :, :] - image[:, :-1, :, :]
    y_diff = image[:, :, 1:, :] - image[:, :, :-1, :]
    return tf.reduce_sum(tf.abs(x_diff)) + tf.reduce_sum(tf.abs(y_diff))

# 5. Load Images and Initialize Generated Image

In [18]:
content_image = load_img('content.jpg')
style_image = load_img('style.jpg')
generated_image = tf.Variable(content_image, dtype=tf.float32)

# 6. Extract Feature Targets from the Model

In [19]:
model = get_model()
# Cast targets to float32 for consistency
content_target = tf.cast(model(content_image)[0], tf.float32)
style_targets = [tf.cast(gram_matrix(style_output), tf.float32) for style_output in model(style_image)[1:]]

TypeError: `x` and `y` must have the same dtype, got tf.float16 != tf.float32.

# 7. Optimizer Setup

In [None]:
optimizer = tf.optimizers.Adam(learning_rate=LEARNING_RATE)

# 8. Training Step (compiled with tf.function for performance)

In [None]:
@tf.function
def train_step(generated_image, content_target, style_targets):
    with tf.GradientTape() as tape:
        outputs = model(generated_image)
        # Cast outputs to float32 for loss computation
        generated_content = tf.cast(outputs[0], tf.float32)
        generated_styles = [tf.cast(gram_matrix(style_output), tf.float32) for style_output in outputs[1:]]
        
        # Compute individual losses
        c_loss = content_loss(content_target, generated_content)
        s_loss = tf.add_n([
            style_loss(style_target, generated_style)
            for style_target, generated_style in zip(style_targets, generated_styles)
        ]) / len(style_targets)
        tv_loss = total_variation_loss(generated_image)
        
        total_loss = CONTENT_WEIGHT * c_loss + STYLE_WEIGHT * s_loss + TV_WEIGHT * tv_loss
    gradients = tape.gradient(total_loss, generated_image)
    optimizer.apply_gradients([(gradients, generated_image)])
    return total_loss, c_loss, s_loss, tv_loss

# 9. Training Loop

In [None]:
start_time = time.time()
for step in range(STEPS):
    total_loss, c_loss, s_loss, tv_loss = train_step(generated_image, content_target, style_targets)
    if step % 100 == 0:
        print(f"Step {step}: Total Loss={total_loss:.2e}, Content Loss={c_loss:.2e}, Style Loss={s_loss:.2e}, TV Loss={tv_loss:.2e}")
print(f"Total time: {time.time()-start_time:.2f} seconds")

# 10. Display and Save the Result

In [None]:
result = deprocess_img(generated_image)
plt.imshow(result)
plt.axis('off')
plt.show()