Loading the dataset of choice.

In [None]:
import tensorflow as tf
import keras

base_image_path = "../images/san.png"
style_reference_image_paths = ["../images/starry_night.png"]


In [11]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [12]:
original_width, original_height = keras.utils.load_img(base_image_path).size
img_height = 400
img_width = round(original_width * img_height / original_height) 

In [13]:
import numpy as np
from keras.applications import vgg19

In [14]:
import numpy as np
def preprocess_image(image_path):
 img = keras.utils.load_img(
 image_path, target_size=(img_height, img_width))
 img = keras.utils.img_to_array(img)
 img = np.expand_dims(img, axis=0)
 img = keras.applications.vgg19.preprocess_input(img)
 return tf.convert_to_tensor(img, dtype=tf.float32)


In [17]:
def deprocess_image(img):
 img = img.reshape((img_height, img_width, 3))
 img[:, :, 0] += 103.939
 img[:, :, 1] += 116.779
 img[:, :, 2] += 123.68
 img = img[:, :, ::-1]
 img = np.clip(img, 0, 255).astype("uint8")
 return img

In [18]:
model = keras.applications.vgg19.VGG19(weights="imagenet", include_top=False)
model.trainable = False
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict)

In [19]:
def content_loss(base_img, combination_img):
 return tf.reduce_sum(tf.square(combination_img - base_img))

In [20]:
def gram_matrix(x):
 x = tf.transpose(x, (2, 0, 1))
 features = tf.reshape(x, (tf.shape(x)[0], -1))
 gram = tf.matmul(features, tf.transpose(features))
 return gram

In [22]:
def style_loss(style_img, combination_img):
 S = gram_matrix(style_img)
 C = gram_matrix(combination_img)
 channels = 3
 size = img_height * img_width
 return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

In [23]:
def total_variation_loss(x):
    
    with tf.device("/GPU:0"):
        a = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]
        )
        b = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]
        )
        return tf.reduce_sum(tf.pow(a + b, 1.25))

In [None]:
style_layer_names = [
 "block1_conv1",
 "block2_conv1",
 "block3_conv1",
 "block4_conv1",
 "block5_conv1",
]
content_layer_names = ["block5_conv2"]
total_variation_weight = 1e-6

style_weight = 1e-6
content_weight = 2.5e-8

style_weights = {'block1_conv1': 1.,
                 'block2_conv1': 0.8,
                 'block3_conv1': 0.5,
                 'block4_conv1': 0.3,
                 'block5_conv1': 0.1}

In [None]:
def compute_loss(combination_image, base_image, style_reference_image):
 input_tensor = tf.concat(
 [base_image, style_reference_image, combination_image], axis=0)
 features = feature_extractor(input_tensor)
 loss = tf.zeros(shape=())
 layer_features = features[content_layer_names[0]]
 base_image_features = layer_features[0, :, :, :]
 combination_features = layer_features[2, :, :, :]
 loss = loss + content_weight * content_loss(
 base_image_features, combination_features
 )
 for layer_name in style_layer_names:
    layer_features = features[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    style_loss_value = style_loss(
    style_reference_features, combination_features)
    loss += (style_weight / len(style_layer_names)) * style_loss_value
    
 loss += total_variation_weight * total_variation_loss(combination_image)
 return loss

Set the policy.

In [26]:
from tensorflow.keras.mixed_precision import set_global_policy
#set_global_policy('mixed_float16')

In [None]:
import tensorflow as tf

@tf.function
def compute_loss_and_grads(combination_image, base_image, style_reference_images):
    with tf.device('/GPU:0'):  
        with tf.GradientTape() as tape:
            loss = tf.zeros(shape=())
            num = len(style_reference_images)
            style_cal = style_weight / num
            # iterate through the style images
            for i, style_reference_image in enumerate(style_reference_images):
                style_loss_value = compute_loss(
                    combination_image, base_image, style_reference_image
                )
                loss += style_cal * style_loss_value
            
        grads = tape.gradient(loss, combination_image)
        return loss, grads

In [28]:
optimizer = keras.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)

In [None]:
def preprocess_style_image(style_reference_image_paths):
    images = []
    for path in style_reference_image_paths:
        img = preprocess_image(path)
        images.append(img)
    return tf.concat(images, axis=0)
  

In [None]:
with tf.device('/GPU:0'):
    base_image = preprocess_image(base_image_path)
    style_reference_images = preprocess_style_image(style_reference_image_paths)
    combination_image = tf.Variable(preprocess_image(base_image_path))

In [30]:
import math
import os
from tabnanny import check
import time

In [None]:

def training_loop():
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, combination_image=combination_image)
    generated_images = []
    iterations = 2000
    folder_path = "images"
    best_cost = math.inf
    best_image = None
    
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    checkpoint_dir = "./checkpoints"
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    checkpoint_dir = "./checkpoints"
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    for i in range(1, iterations + 1):
        start_time_cpu = time.process_time()
        start_time_wall = time.time()
        
        with tf.device('/GPU:0'):  # Place operations explicitly on GPU
            loss, grads = compute_loss_and_grads(
                combination_image, base_image, style_reference_images
            )
        optimizer.apply_gradients([(grads, combination_image)])
        
        if i % 100 == 0:
            print(f"Iteration {i}: loss={loss:.2f}")
            img = deprocess_image(combination_image.numpy())
            fname = f"images/combination_image_at_iteration_{i}.png"
            
            end_time_cpu = time.process_time()  
            end_time_wall = time.time()  
            cpu_time = end_time_cpu - start_time_cpu  
            wall_time = end_time_wall - start_time_wall  
            if loss < best_cost:
                best_cost = loss
                best_image = img

            print("CPU times: user {} µs, sys: {} ns, total: {} µs".format(
            int(cpu_time * 1e6),
            int(( end_time_cpu - start_time_cpu) * 1e9),
            int((end_time_cpu - start_time_cpu + 1e-6) * 1e6))
                )
            checkpoint.save(file_prefix=checkpoint_prefix)
            
            print("Wall time: {:.2f} µs".format(wall_time * 1e6))
            print("Iteration :{}".format(i))
            print('Total Loss {:e}.'.format(loss))
            generated_images.append(img)
            keras.utils.save_img(fname, img) 
    return generated_images, best_image, best_cost

In [32]:
generated_images, best_image, best_cost = training_loop()

Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(3, 400, 535, 3))


TypeError: in user code:

    File "C:\Users\Layo\AppData\Local\Temp\ipykernel_19984\189228123.py", line 7, in compute_loss_and_grads  *
        loss = compute_loss(
    File "C:\Users\Layo\AppData\Local\Temp\ipykernel_19984\2033682352.py", line 6, in compute_loss  *
        layer_features = features[content_layer_name]

    TypeError: unhashable type: 'list'


In [None]:
import matplotlib.pyplot as plt

def display_image(img):
    plt.imshow(img)
    plt.axis("off")

In [None]:
plt.figure(figsize=(12, 12))
start_index = 0
num = len(generated_images)
for i in range(num):
    plt.subplot(4, 3, i + 1)
    display_image(generated_images[i + start_index])  # Adjust indices based on your data
plt.show()


plt.figure(figsize=(8, 8))
display_image(best_image)
plt.title("Best Image")
plt.show()

Doing this with video.

In [None]:
def process_frame_or_batch(frame_tensor, base_image, style_reference_image, optimizer):

    frame_tensor = tf.Variable(frame_tensor)  # Ensure the tensor is trainable

    loss, grads = compute_loss_and_grads(frame_tensor, base_image, style_reference_image)
    optimizer.apply_gradients([(grads, frame_tensor)])

    return loss, frame_tensor


In [None]:
from typing import Union
import numpy as np
import tensorflow as tf

ImageType = Union[np.ndarray, tf.Tensor]


def frame_image_read(image : ImageType) -> tf.Tensor:
  max_dim=512
  image= tf.convert_to_tensor(image, dtype = tf.float32)
  image= image/255.0
  shape = tf.cast(tf.shape(image)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim/long_dim
  new_shape = tf.cast(shape*scale, tf.int32)
  new_image = tf.image.resize(image, new_shape)
  new_image = new_image[tf.newaxis, :]
  
  return new_image


In [None]:
import cv2
import tensorflow as tf
import numpy as np
def neural_video_transfer(base_image_path, style_reference_image_paths,video_path : str = "videos/content/coast.mp4", output_video_path : str  = "videos/output/output_video.mp4", img_height : int = 400, img_width : int = 400):
    # Load the base and style reference images
    base_image = preprocess_image(base_image_path)
    style_images = [preprocess_image(path) for path in style_reference_image_paths]
    style_reference_image = tf.concat(style_images, axis=0)  

    # Initialize the video capture and writer
    video = cv2.VideoCapture(video_path)
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # Process frames one by one
    while True:
        ret, frame = video.read()
        if not ret:
            break  # End of video

        # Preprocess the frame (convert BGR to RGB)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_tensor = frame_image_read(frame_rgb)  # Use your preprocessing function

        # Resize the frame_tensor to match the dimensions of base_image
        frame_tensor_resized = tf.image.resize(frame_tensor, (img_height, img_width))

        # Apply the style transfer process
        loss, processed_frame = process_frame_or_batch(frame_tensor_resized, base_image, style_reference_image, optimizer)
        
        # Post-process the frame
        frame_output = deprocess_image(processed_frame.numpy())  # Use your deprocessing function
        frame_color_output = cv2.cvtColor(frame_output, cv2.COLOR_RGB2BGR)

        # Write the processed frame to the output video
        output_video.write(frame_color_output)

    # Release resources
    video.release()
    output_video.release()


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(3, 400, 535, 3))


ValueError: Unknown variable: <tf.Variable 'Variable:0' shape=(1, 400, 535, 3) dtype=float32, numpy=
array([[[[0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         ...,
         [0.80784315, 0.8392157 , 0.8745098 ],
         [0.80784315, 0.8392157 , 0.8745098 ],
         [0.80784315, 0.8392157 , 0.8745098 ]],

        [[0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         ...,
         [0.8067059 , 0.83807844, 0.87337255],
         [0.8067059 , 0.83807844, 0.87337255],
         [0.8067059 , 0.83807844, 0.87337255]],

        [[0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         ...,
         [0.8041176 , 0.83549017, 0.87078434],
         [0.8041176 , 0.83549017, 0.87078434],
         [0.8041176 , 0.83549017, 0.87078434]],

        ...,

        [[0.3721815 , 0.3721815 , 0.3094364 ],
         [0.38401297, 0.38401297, 0.32126787],
         [0.38384408, 0.38384408, 0.32109898],
         ...,
         [0.72009826, 0.6926473 , 0.5397061 ],
         [0.70749855, 0.6800476 , 0.5271064 ],
         [0.6989465 , 0.67149544, 0.51855433]],

        [[0.37590197, 0.37590197, 0.31315687],
         [0.39076027, 0.39076027, 0.32801518],
         [0.3840141 , 0.3840141 , 0.321269  ],
         ...,
         [0.7309894 , 0.70353836, 0.55059725],
         [0.71922123, 0.69177026, 0.5388291 ],
         [0.7145346 , 0.68708354, 0.53414243]],

        [[0.3764706 , 0.3764706 , 0.3137255 ],
         [0.39664835, 0.39664835, 0.33390325],
         [0.38316384, 0.38316384, 0.32041875],
         ...,
         [0.73158336, 0.7041323 , 0.5511912 ],
         [0.7327167 , 0.7052657 , 0.55232453],
         [0.72377455, 0.6963236 , 0.5433824 ]]]], dtype=float32)>. This optimizer can only be called for the variables it was originally built with. When working with a new set of variables, you should recreate a new optimizer instance.

Setup the video parameters

In [None]:
# Video file path
video_path = "videos/coast.mp4"
output_video_path = "output_video.mp4"

img_height = 400
img_width = 400

Perform neural video transfer

In [None]:

neural_video_transfer(base_image_path, style_reference_image_path, video_path, output_video_path, img_height, img_width)

In [None]:
import cv2
import tensorflow as tf
import numpy as np
def apply_camera(output_camera_video_path : str = "output_video.mp4"):
    # Access the camera using OpenCV
    camera = cv2.VideoCapture(0)  # "0" usually refers to the default webcam
    frame_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(camera.get(cv2.CAP_PROP_FPS))

    # Initialize Video Writer for output
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_camera_video = cv2.VideoWriter(output_camera_video_path, fourcc, fps, (frame_width, frame_height))


    while True:
        ret, frame = camera.read()
        if not ret:
            break  


        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_tensor = frame_image_read(frame_rgb) 

        frame_tensor_resized = tf.image.resize(frame_tensor, (img_height, img_width))

        
        loss, processed_frame = process_frame_or_batch(frame_tensor_resized, base_image, style_reference_image, optimizer)
        

        frame_output = deprocess_image(processed_frame.numpy())  
        frame_color_output = cv2.cvtColor(frame_output, cv2.COLOR_RGB2BGR)

        output_camera_video.write(frame_color_output)

        cv2.imshow('Processed Frame', frame_color_output)
        

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    camera.release()
    output_camera_video.release()
    cv2.destroyAllWindows()
