In [1]:
import tensorflow as tf
import numpy as np
from PIL import Image
import pprint
import tqdm
import matplotlib.pyplot as plt

class StyleTransfer:
    def __init__(self, content_image, style_image, vgg_model, style_layers=None, content_layer=None, alpha=10, beta=40, img_size=400):
        self.content_image = content_image
        self.style_image = style_image
        self.vgg_model = vgg_model
        self.style_layers = style_layers or [
            ('block1_conv1', 0.2),
            ('block2_conv1', 0.2),
            ('block3_conv1', 0.2),
            ('block4_conv1', 0.2),
            ('block5_conv1', 0.2)]
        self.content_layer = content_layer or [('block5_conv4', 1)]
        self.alpha = alpha
        self.beta = beta
        self.img_size = img_size

    def get_layer_outputs(self, layer_names):
        """ Creates a vgg model that returns a list of intermediate output values. """
        outputs = [self.vgg_model.get_layer(layer[0]).output for layer in layer_names]
        model = tf.keras.Model([self.vgg_model.input], outputs)
        return model

    def compute_content_cost(self, content_output, generated_output):
        a_C = content_output[-1]
        a_G = generated_output[-1]
        m, n_H, n_W, n_C = a_G.shape.as_list()

        a_C_unrolled = tf.transpose(tf.reshape(a_C, shape=[m, -1, n_C]))
        a_G_unrolled = tf.transpose(tf.reshape(a_G, shape=[m, -1, n_C]))

        J_content = (1 / (4 * n_H * n_W * n_C)) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))
        return J_content

    def compute_layer_style_cost(self, a_S, a_G):
        m, n_H, n_W, n_C = a_G.shape
        a_S = tf.transpose(tf.reshape(a_S, shape=[-1, n_C]))
        a_G = tf.transpose(tf.reshape(a_G, shape=[-1, n_C]))

        GS = self.gram_matrix(a_S)
        GG = self.gram_matrix(a_G)

        J_style_layer = (1 / (4 * n_C ** 2 * (n_H * n_W) ** 2)) * tf.reduce_sum(tf.square(tf.subtract(GS, GG)))
        return J_style_layer

    def gram_matrix(self, A):
        GA = tf.matmul(A, A, transpose_b=True)
        return GA

    def compute_style_cost(self, style_image_output, generated_image_output):
        J_style = 0
        a_S = style_image_output[:-1]
        a_G = generated_image_output[:-1]
        for i, weight in zip(range(len(a_S)), self.style_layers):
            J_style_layer = self.compute_layer_style_cost(a_S[i], a_G[i])
            J_style += weight[1] * J_style_layer
        return J_style

    def total_cost(self, J_content, J_style):
        J = self.alpha * J_content + self.beta * J_style
        return J

    def train_step(self, generated_image, a_C, a_S):
        with tf.GradientTape() as tape:
            a_G = self.vgg_model(generated_image)
            J_style = self.compute_style_cost(a_S, a_G)
            J_content = self.compute_content_cost(a_C, a_G)
            J = self.total_cost(J_content, J_style)

        grad = tape.gradient(J, generated_image)
        optimizer.apply_gradients([(grad, generated_image)])
        generated_image.assign(self.clip_0_1(generated_image))

        return J

    def clip_0_1(self, image):
        return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

    def tensor_to_image(self, tensor):
        tensor = tensor * 255
        tensor = np.array(tensor, dtype=np.uint8)
        if np.ndim(tensor) > 3:
            assert tensor.shape[0] == 1
            tensor = tensor[0]
        return Image.fromarray(tensor)

    def generate_image(self, output_path="generated_image.jpg"):
        # Preprocess content and style images
        content_image = tf.expand_dims(tf.image.convert_image_dtype(self.content_image, tf.float32), axis=0)
        style_image = tf.expand_dims(tf.image.convert_image_dtype(self.style_image, tf.float32), axis=0)

        # Get the outputs of the layers from the model
        vgg_model_outputs = self.get_layer_outputs(self.style_layers + self.content_layer)

        # Extract the style and content features from VGG
        content_target = vgg_model_outputs(content_image)  # Content encoder
        style_targets = vgg_model_outputs(style_image)    # Style encoder

        a_C = content_target  # Content features
        a_S = style_targets   # Style features

        # Initialize generated image with noise added to content image
        generated_image = tf.Variable(content_image)

        # Training loop
        for epoch in range(30):
            J = self.train_step(generated_image, a_C, a_S)
            print(f"Epoch {epoch}, Total Cost: {J.numpy()}")
            if epoch % 10 == 0:
                image = self.tensor_to_image(generated_image)
                image.save(output_path)
                plt.imshow(image)
                plt.show()

        return generated_image


2025-03-06 16:55:18.047501: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-06 16:55:18.050571: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-06 16:55:18.061144: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741290918.079067   10921 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741290918.084086   10921 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-06 16:55:18.100930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
# Paths to the content and style images
content_image_path = "../media/content.jpeg"
style_image_path = "../media/style.jpeg"

# Initialize the style transfer object
style_transfer = StyleTransfer(content_image_path, style_image_path, img_size=400, alpha=10, beta=40)

# Run the style transfer
generated_image = style_transfer.generate_image(output_path="generated_output.jpg")

# Display the generated image
generated_image.show()


TypeError: StyleTransfer.__init__() missing 1 required positional argument: 'vgg_model'