In [None]:
# === Environment Setup ===
# Purpose: Import necessary libraries for image processing, deep learning, and visualization.
import os, sys, math, time, random, json, textwrap, warnings
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
try:
    # TensorFlow is the core library for building and running the style transfer model.
    import tensorflow as tf
    from tensorflow.keras.preprocessing import image as kp_image
    # We use a pre-trained VGG19 model as the feature extractor.
    from tensorflow.keras.applications import vgg19
    TENSORFLOW_AVAILABLE = True
except ImportError:
    TENSORFLOW_AVAILABLE = False
from IPython.display import display, Markdown, Image

# --- Configuration ---
# Purpose: Standardize plotting styles and numerical output.
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'font.size': 14, 'figure.figsize': (10, 6), 'figure.dpi': 150})
np.set_printoptions(suppress=True, linewidth=120, precision=4)

# --- Utility Functions ---
def note(msg): display(Markdown(f"<div class='alert alert-info'>📝 {textwrap.fill(msg, width=100)}</div>"))
def sec(title): print(f"\n{80*'='}\n| {title.upper()} |\n{80*'='}")

if not TENSORFLOW_AVAILABLE: note("TensorFlow not found. Please run `pip install tensorflow`")
note("Environment initialized for Neural Style Transfer.")

# Part 9: Specialized Topics & Publishing
## Chapter 9.04: Neural Style Transfer

### Introduction: Decomposing and Recombining Images

**Neural Style Transfer** is a fascinating application of deep learning that allows us to recompose one image in the style of another. The technique, introduced by Gatys, Ecker, and Bethge (2015), uses the feature representations learned by a deep convolutional neural network (CNN) to separate the *content* of an image from its *style*. The core idea is to define a loss function that drives a generated image to simultaneously match the content of a **content image** and the style of a **style image**.

### 1. The Theory: Content and Style Loss
The process involves optimizing a generated image by minimizing a total loss function that is a weighted sum of two components:

#### 1.1 Content Loss
The **content** of an image is captured by the feature maps in the *upper* layers of a CNN. These layers learn to identify high-level objects and their arrangement, ignoring fine-grained details. The content loss is simply the mean squared error between the feature maps of the content image and the generated image in a specific high-level layer.
$$ \mathcal{L}_{content} = \frac{1}{2} \sum_{i,j} (F_{ij}^l - C_{ij}^l)^2 $$

#### 1.2 Style Loss
The **style** of an image is captured by the correlations between the activations of different filters in the *lower* layers of a CNN. These correlations represent textures, colors, and common patterns. The **Gram matrix** is a tool to measure these correlations. For a layer's feature map, the Gram matrix is the inner product of the vectorized feature maps. The style loss is then the squared difference between the Gram matrices of the style image and the generated image, summed across several layers.
$$ \mathcal{L}_{style} = \sum_{l} w_l E_l \quad \text{where} \quad E_l = \frac{1}{4 N_l^2 M_l^2} \sum_{i,j} (G_{ij}^l - A_{ij}^l)^2 $$

### 2. Implementation with TensorFlow
We will use a pre-trained VGG19 network to extract the necessary features and then optimize an input image to minimize the combined loss.

In [None]:
sec("Step 1: Load and Preprocess Images")
if TENSORFLOW_AVAILABLE:
    # Purpose: Define functions to load, resize, and display images.
    def load_img(path_to_img):
        # The image is loaded and resized to a maximum dimension of 512 pixels to manage memory and computation time.
        max_dim = 512
        img = tf.io.read_file(path_to_img)
        img = tf.image.decode_image(img, channels=3) # Ensure 3 color channels
        img = tf.image.convert_image_dtype(img, tf.float32) # Convert to float32 for model input
        shape = tf.cast(tf.shape(img)[:-1], tf.float32)
        long_dim = max(shape)
        scale = max_dim / long_dim
        new_shape = tf.cast(shape * scale, tf.int32)
        img = tf.image.resize(img, new_shape)
        # Add a batch dimension to the image tensor.
        img = img[tf.newaxis, :]
        return img

    def imshow(image, title=None):
        # Helper function to display a tensor as an image.
        out = np.squeeze(image, axis=0) # Remove the batch dimension
        out = out.clip(0, 1) # Clip values to be in the valid [0,1] range for floats
        plt.imshow(out); plt.axis('off')
        if title: plt.title(title)

    # Asset Localization: Using local paths instead of downloading.
    content_path = '../images/07-Machine-Learning/Labrador.jpg'
    style_path = '../images/07-Machine-Learning/Kandinsky.jpg'
    content_image = load_img(content_path)
    style_image = load_img(style_path)

    # Display the initial images.
    plt.figure(figsize=(10, 10))
    plt.subplot(1, 2, 1); imshow(content_image, 'Content Image')
    plt.subplot(1, 2, 2); imshow(style_image, 'Style Image')
    plt.show()
else:
    note("TensorFlow not available. Skipping Style Transfer example.")

#### Step 2: Define Content and Style Layers
We select intermediate layers from the VGG19 network. Lower layers capture fine textures and details (style), while higher layers capture high-level object features (content).

In [None]:
if TENSORFLOW_AVAILABLE:
    # Purpose: Specify the layers from the VGG19 model to be used for content and style extraction.
    # Content is typically extracted from deeper layers, which capture high-level object features.
    content_layers = ['block5_conv2']
    # Style is extracted from a range of layers, from shallow to deep, to capture textures and patterns of different scales.
    style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
    num_content_layers = len(content_layers)
    num_style_layers = len(style_layers)

#### Step 3: Build the Feature Extractor Model
We create a custom Keras model that takes an image and returns the feature map activations from the specified content and style layers of the VGG19 network.

In [None]:
if TENSORFLOW_AVAILABLE:
    # Purpose: Create the feature extractor model and define the Gram matrix calculation.
    def vgg_layers(layer_names):
        # This function creates a Keras model that returns the feature maps from the specified layers of a pre-trained VGG19 model.
        vgg = vgg19.VGG19(include_top=False, weights='imagenet')
        vgg.trainable = False # We freeze the VGG weights; we only use it for feature extraction.
        outputs = [vgg.get_layer(name).output for name in layer_names]
        return tf.keras.Model([vgg.input], outputs)

    def gram_matrix(input_tensor):
        # The Gram matrix captures the style of an image by calculating the correlations between different filter responses.
        # It is the inner product of the vectorized feature maps.
        result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
        num_locations = tf.cast(tf.shape(input_tensor)[1]*tf.shape(input_tensor)[2], tf.float32)
        return result / num_locations # Normalize by the number of locations.

    class StyleContentModel(tf.keras.models.Model):
        # A custom model that wraps the VGG feature extractor and computes the style (Gram matrix) and content representations.
        def __init__(self, style_layers, content_layers):
            super(StyleContentModel, self).__init__()
            self.vgg = vgg_layers(style_layers + content_layers)
            self.style_layers = style_layers
            self.content_layers = content_layers
            self.num_style_layers = len(style_layers)
            self.vgg.trainable = False

        def call(self, inputs):
            # The input image is first preprocessed to match VGG19's expected format.
            inputs = inputs * 255.0
            preprocessed_input = vgg19.preprocess_input(inputs)
            outputs = self.vgg(preprocessed_input)
            style_outputs, content_outputs = (outputs[:self.num_style_layers], outputs[self.num_style_layers:])
            # Calculate the Gram matrix for each style layer's output.
            style_outputs = [gram_matrix(style_output) for style_output in style_outputs]
            content_dict = {name: value for name, value in zip(self.content_layers, content_outputs)}
            style_dict = {name: value for name, value in zip(self.style_layers, style_outputs)}
            return {'content': content_dict, 'style': style_dict}

    # Instantiate the extractor and compute the target style and content representations from the original images.
    # These targets will remain constant during optimization.
    extractor = StyleContentModel(style_layers, content_layers)
    style_targets = extractor(style_image)['style']
    content_targets = extractor(content_image)['content']

#### Step 4: Define and Run the Training Step
We define the total loss and use an Adam optimizer to minimize it. Crucially, the variables being updated are not the weights of a network, but the pixels of the input image itself.

In [None]:
if TENSORFLOW_AVAILABLE:
    # --- Optimization Setup ---
    # We start with the content image and iteratively modify its pixels to match the target style.
    image = tf.Variable(content_image)
    # The Adam optimizer is used to perform the gradient descent.
    opt = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)

    # --- Loss Function Definition ---
    # These weights control the trade-off between matching the content and matching the style.
    style_weight=1e-2
    content_weight=1e4
    
    def style_content_loss(outputs):
        style_outputs = outputs['style']
        content_outputs = outputs['content']
        # The style loss is the mean squared error between the target and current Gram matrices, summed across all style layers.
        style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2) for name in style_outputs.keys()])
        style_loss *= style_weight / num_style_layers
        # The content loss is the mean squared error between the target and current feature maps.
        content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2) for name in content_outputs.keys()])
        content_loss *= content_weight / num_content_layers
        # The total loss is the weighted sum of the two.
        return style_loss + content_loss

    # --- Training Step ---
    # The @tf.function decorator compiles this function into a high-performance TensorFlow graph.
    @tf.function()
    def train_step(image):
        with tf.GradientTape() as tape:
            # Compute the style and content representations of the current image.
            outputs = extractor(image)
            # Calculate the total loss.
            loss = style_content_loss(outputs)
        # Calculate the gradient of the loss with respect to the image pixels.
        grad = tape.gradient(loss, image)
        # Apply the gradients to update the image.
        opt.apply_gradients([(grad, image)])
        # Ensure the image pixel values remain in the valid [0, 1] range.
        image.assign(tf.clip_by_value(image, 0.0, 1.0))

    # --- Run Optimization ---
    note("Training the style transfer model... (This will take a few minutes)")
    epochs = 10; steps_per_epoch = 100
    start_time = time.time()
    for n in range(epochs):
        for m in range(steps_per_epoch):
            train_step(image)
        print(f'Epoch {n+1}/{epochs} complete.')
    end_time = time.time()
    note(f"Training complete in {end_time - start_time:.1f} seconds.")

In [None]:
sec("Final Result")
if TENSORFLOW_AVAILABLE:
    # Purpose: Display the original and final generated images for comparison.
    plt.figure(figsize=(18, 6))
    plt.subplot(1, 3, 1); imshow(content_image, 'Content Image')
    plt.subplot(1, 3, 2); imshow(style_image, 'Style Image')
    plt.subplot(1, 3, 3); imshow(image, 'Generated Image')
    plt.show()
else:
    note("TensorFlow not available.")

### 3. Economic Applications
While style transfer is primarily an artistic application, the underlying techniques of using pre-trained CNNs as feature extractors are highly relevant in economics. For example, researchers use features extracted from satellite imagery to measure economic activity in areas with poor data. Features like the intensity of nighttime lights, the density of buildings, or the types of roofing materials can be highly correlated with GDP, poverty, and infrastructure development.

### 4. Exercises

1.  **The Role of Weights:** In the `style_content_loss` function, we have `style_weight` and `content_weight`. Re-run the training process with `style_weight=1e-1` and then with `style_weight=1e-3`. How does the final image change? Explain the role of these weights in the optimization process.

2.  **Layer Choice:** The choice of which layers to use for content and style is a key hyperparameter. 
    a. What would happen if you used a very low-level layer (e.g., `block1_conv1`) for the content loss? 
    b. What would happen if you used only high-level layers (e.g., `block5_conv1`) for the style loss? 
    Modify the code to test one of these hypotheses.

3.  **The Gram Matrix:** Explain in your own words why the Gram matrix is a good representation of style. What do the diagonal and off-diagonal elements of the Gram matrix represent in terms of the feature maps?

4.  **Economic Analogy:** Consider the problem of measuring the quality of infrastructure in a developing country using satellite images. Which layers of a CNN would you use to extract features related to the *presence* of roads (content)? Which layers might be useful for measuring the *quality* or *type* of roads (style/texture)?