Loading the dataset of choice.

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
import keras

# List all physical GPU devices
gpus = tf.config.list_physical_devices('GPU')

In [None]:
print("TensorFlow version:", tf.__version__)

In [None]:
print("CUDA Available:", tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
cpu_devices = tf.config.list_physical_devices('CPU')
print("Available CPUs:", cpu_devices)

Num GPUs Available:  0


In [None]:
GPU_in_use: int = 0
CPU_in_use: int = 0

In [None]:
from device_helper import get_device

In [None]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(gpus[GPU_in_use].name)
else:
    print("No GPU found")

In [None]:
import tensorflow as tf
import keras

base_image_path = "../images/san.png"
style_reference_image_paths = ["../images/starry_night.png"]
style_reference_path = style_reference_image_paths[0]

In [None]:
original_width, original_height = keras.utils.load_img(base_image_path).size
img_height = 400
img_width = round(original_width * img_height / original_height) 

In [None]:
import numpy as np
from keras.applications import vgg19

In [None]:
from helper import  preprocess_image, deprocess_image
from loss_functions import style_loss, content_loss, total_variation_loss_l1

In [None]:
total_variation_weight = 1e-6

style_weight = 1e-6
content_weight = 2.5e-8

In [None]:
from shared_utils.gatys_network import get_content_layer_names,get_style_layer_names,get_style_weights

In [None]:
chosen_loss_network : str = "mobilenet"

In [None]:

style_layer_names = [
 "block1_conv1",
 "block2_conv1",
 "block3_conv1",
 "block4_conv1",
 "block5_conv1",
]
content_layer_names = ["block5_conv2"]
style_weights = {'block1_conv1': 1.,
                 'block2_conv1': 0.8,
                 'block3_conv1': 0.5,
                 'block4_conv1': 0.3,
                 'block5_conv1': 0.1}
use_custom : bool = True
if use_custom:
    style_layer_names = get_style_layer_names(chosen_loss_network)
    content_layer_names = get_content_layer_names(chosen_loss_network)
    style_weights = get_style_weights(chosen_loss_network)



In [None]:
from shared_utils.network import get_layer_names_for_loss_net,get_model_for_loss_net

In [None]:
def get_model(model_name : str,img_width : int,img_height : int):
  """ Creates our model with access to intermediate layers. 
  
  This function will load the VGG19 model and access the intermediate layers. 
  These layers will then be used to create a new model that will take input image
  and return the outputs from these intermediate layers from the VGG model. 
  
  Returns:
    returns a keras model that takes image inputs and outputs the style and 
      content intermediate layers. 
  """
  # Load our model. We load pretrained VGG, trained on imagenet data (weights=’imagenet’)
  vgg = get_model_for_loss_net(model_name,image_size=(img_width,img_height))
  vgg.trainable = False
  # Get output layers corresponding to style and content layers 
  style_outputs = [vgg.get_layer(name).output for name in style_layer_names]
  content_outputs = [vgg.get_layer(name).output for name in content_layer_names]
  model_outputs = style_outputs + content_outputs
  # Build model 

  return keras.Model(vgg.input, model_outputs)

In [None]:
feature_extractor = get_model(chosen_loss_network,img_width,img_height)

In [None]:
def get_feature_representations():
    # Get the style and content feature representations
    base_image = preprocess_image(base_image_path, img_height, img_width)
    style_reference_images = [preprocess_image(img, img_height, img_width) for img in style_reference_image_paths]
    
    # Compute the feature representations for the base image
    base_image_features = feature_extractor(base_image)
    
    # Compute the feature representations for the style reference images
    style_reference_features = [feature_extractor(style_reference_image) for style_reference_image in style_reference_images]
    
    return base_image_features, style_reference_features

In [None]:
def compute_loss(combination_image, base_image, style_reference_image):
 input_tensor = tf.concat(
 [base_image, style_reference_image, combination_image], axis=0)
 features = feature_extractor(input_tensor)
 loss = tf.zeros(shape=())
 layer_features = features[content_layer_names[0]]
 base_image_features = layer_features[0, :, :, :]
 combination_features = layer_features[2, :, :, :]
 loss = loss + content_weight * content_loss(
 base_image_features, combination_features
 )
 for layer_name in style_layer_names:
    layer_features = features[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    style_loss_value = style_loss(
    style_reference_features, combination_features, img_height, img_width)
    loss += (style_weight / len(style_layer_names)) * style_loss_value
    
 loss += total_variation_weight * total_variation_loss_l1(combination_image, img_height, img_width)
 return loss

In [None]:
from tensorflow.keras import layers

Set the policy.

In [None]:
from tensorflow.keras.mixed_precision import set_global_policy
#set_global_policy('mixed_float16')

In [None]:
def normalization_grads(grads):
    norm = tf.linalg.global_norm(grads)
    norm_grads = [g / (norm + 1e-8) for g in grads]
    return norm_grads

In [None]:
import tensorflow as tf

@tf.function
def compute_loss_and_grads(combination_image, base_image, style_reference_images,apply_normalization=False):
    with get_device(GPU_in_use, CPU_in_use):  
        with tf.GradientTape() as tape:
            loss = tf.zeros(shape=())
            num = len(style_reference_images)
            style_cal = style_weight / num
            # iterate through the style images
            for i, style_reference_image in enumerate(style_reference_images):
                style_loss_value = compute_loss(
                    combination_image, base_image, style_reference_image
                )
                loss += style_cal * style_loss_value
        
        grads = tape.gradient(loss, combination_image)
        if apply_normalization:
            grads = normalization_grads(grads)
        return loss, grads

In [None]:
def preprocess_style_image(style_reference_image_paths):
    images = []
    for path in style_reference_image_paths:
        img = preprocess_image(path, img_height, img_width)
        images.append(img)
    return tf.concat(images, axis=0)
  

In [None]:
def add_noise_to_image(image,noise_strength : float =0.1):
    noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=noise_strength, dtype=image.dtype)
    noisy_image = image + noise
    return tf.clip_by_value(noisy_image, 0.0, 255.0)

In [None]:
def preprocess_NST_images(base_image_path : str, style_reference_image_path : str):
    with get_device(GPU_in_use, CPU_in_use):
        base_image = preprocess_image(base_image_path, img_height, img_width)
        style_reference_images = preprocess_image(style_reference_image_path, img_height, img_width)
        initial_combination_image = add_noise_to_image(base_image)
        combination_image = tf.Variable(initial_combination_image)
    return base_image, style_reference_images, combination_image


In [None]:
import math
import os
import time
import numpy as np

In [None]:
import pyJoules
import GPUtil
import psutil
from datetime import datetime
from tracker_helper import get_gpu_usage

In [None]:
from optimizer import get_optimizer

In [None]:
class checkPointManager:
    def __init__(self, combination_image, optimizer : str, checkpoint_dir : str, folder_path : str):
        self.optimizer = optimizer
        self.combination_image = combination_image
        self.checkpoint_dir = checkpoint_dir
        
        self.folder_path =  folder_path
        self.checkpoint = tf.train.Checkpoint(optimizer=optimizer, combination_image=combination_image)
        self.manager = tf.train.CheckpointManager(self.checkpoint, directory=checkpoint_dir, max_to_keep=5)

    def save(self):
        return self.manager.save()
    
    def save_checkpoint(self, step,checkpoint_prefix):
        self.checkpoint.save(file_prefix=checkpoint_prefix)
    
    def setup(self):
        if not os.path.exists(self.folder_path):
            os.makedirs(self.folder_path)
        checkpoint_dir = "./checkpoints"
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

In [None]:
def result_save(content_name : str,style_name: str,iterations : int, img: np.ndarray,verbose: int = 0):
    now = datetime.now()
    now = now.strftime("%Y%m%d_%H%M%S")
    fname = f"images/{content_name}_{style_name}_{now}_combination_image_at_iteration_{iterations}.png"
    keras.utils.save_img(fname, img) 
    if verbose > 0:
        print("Image saved at iteration {}".format(iterations))

In [None]:
def apply_style_transfer_step(combination_image, base_image, style_reference_image, optimizer):
    with get_device(GPU_in_use, CPU_in_use):
        loss, grads = compute_loss_and_grads(
            combination_image, base_image, style_reference_image
        )
    optimizer.apply_gradients([(grads, combination_image)])
    return loss, grads

In [None]:

from psutil import disk_usage


def training_loop(base_image, style_reference_image, combination_image,content_name : str,style_name: str,verbose : int = 0,include_checkpoints : bool = False, chosen_optimizer : str = "adam", learning_rate : float = 0.01, improvement_threshold : float = 0.5):
    optimizer = get_optimizer(chosen_optimizer, learning_rate=learning_rate)
    checkpoint = None
    generated_images = []
    start_step : int = 1
    iterations = 1000
    check_step: int = 100
    folder_path = "images"
    best_cost = math.inf
    best_image = None
    checkpoint_dir = "./checkpoints"
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    if include_checkpoints:
        checkpoint = tf.train.Checkpoint(optimizer=optimizer, combination_image=combination_image)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
       
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    total_wall_time = time.time() 
    total_time_cpu = time.process_time()
    start_time_cpu = time.process_time()
    start_time_wall = time.time()

    gpu_usage_list = []
    ram_usage_list = []
    disk_usage_list = []
    cpu_usage_list = []
    cpu_duration_logs = []
    wall_duration_logs = []
    if start_step > iterations:
        print(f"Start step ({start_step}) is greater than the specified iterations ({iterations}). No training will be performed.")
        return generated_images, best_image, best_cost, ram_usage_list,gpu_usage_list
    for i in range(start_step, iterations + 1):
        
        loss, grads = apply_style_transfer_step(combination_image, base_image, style_reference_image, optimizer)
        
        if i % check_step == 0:
            
            gpu = get_gpu_usage()
            if gpu is not None:
                gpu_usage_list.append((i, gpu))
            ram = psutil.virtual_memory().percent
            cpu = psutil.cpu_percent(interval=1)
            disk = disk_usage('/').percent
            
            # append the current usage statistics
            ram_usage_list.append((i, ram))
            gpu_usage_list.append((i, gpu))
            disk_usage_list.append((i, disk))
            cpu_usage_list.append((i, cpu))
            if verbose > 0:
                print(f"Iteration {i}: loss={loss:.2f}")
            img = deprocess_image(combination_image.numpy(), img_height, img_width)
           
            end_time_cpu = time.process_time()  
            end_time_wall = time.time()  
            cpu_time = end_time_cpu - start_time_cpu  
            wall_time = end_time_wall - start_time_wall

            cpu_duration_logs.append((i, cpu_time))
            wall_duration_logs.append((i, wall_time))
            if loss < best_cost:
                best_cost = loss
                best_image = img
            if verbose > 0:
                print("CPU times in seconds: {:.2f}".format(cpu_time))
                print("Wall time in seconds: {:.2f}".format(wall_time))
            if include_checkpoints and checkpoint is not None:
                checkpoint.save(file_prefix=checkpoint_prefix)
            if verbose > 0:
                print("Iteration :{}".format(i))
                print('Total Loss {:e}.'.format(loss))
            generated_images.append(img)
            result_save(content_name, style_name, i, img)
            start_time_cpu = time.process_time()
            start_time_wall = time.time()
    end_time_wall = time.time()
    end_time_cpu = time.process_time()
    end_total_wall_time = end_time_wall - total_wall_time
    end_total_time_cpu = end_time_cpu - total_time_cpu
    if verbose > 0:
        print("Total wall time: {:.2f} seconds".format(end_total_wall_time))
        print("Total CPU time: {:.2f} seconds".format(end_total_time_cpu))
    return generated_images, best_image, best_cost,ram_usage_list,gpu_usage_list

In [None]:
content_folder = "content"
style_folder = "style"

In [None]:
image_file_types = ('.png', '.jpg', '.jpeg')

In [None]:
content_images = [os.path.join(content_folder, f) for f in os.listdir(content_folder) if f.endswith(image_file_types)][0:1]
style_images = [os.path.join(style_folder, f) for f in os.listdir(style_folder) if f.endswith(image_file_types)][0:1]

In [None]:

image_set = []
best_image_set = []
best_cost_set = []
for content_path in content_images:
    content_name = os.path.basename(content_path)
    for style_path in style_images:
        style_name = os.path.basename(style_path)
        base_image, style_reference_image, combination_image = preprocess_NST_images(
            content_path, style_path)
        generated_images, best_image, best_cost,ram_usage_list,gpu_usage_list = training_loop(base_image, style_reference_image,combination_image,content_name,style_name )
        image_set.append(generated_images)
        best_image_set.append(best_image)
        best_cost_set.append(best_cost)
        
        
    

Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(3, 400, 535, 3))


TypeError: in user code:

    File "C:\Users\Layo\AppData\Local\Temp\ipykernel_19984\189228123.py", line 7, in compute_loss_and_grads  *
        loss = compute_loss(
    File "C:\Users\Layo\AppData\Local\Temp\ipykernel_19984\2033682352.py", line 6, in compute_loss  *
        layer_features = features[content_layer_name]

    TypeError: unhashable type: 'list'


In [None]:
import matplotlib.pyplot as plt

def display_image(img):
    plt.imshow(img)
    plt.axis("off")

In [None]:
plt.figure(figsize=(12, 12))
start_index = 0
num = len(generated_images)
for i in range(num):
    plt.subplot(4, 3, i + 1)
    display_image(generated_images[i + start_index])  # Adjust indices based on your data
plt.show()


plt.figure(figsize=(8, 8))
display_image(best_image)
plt.title("Best Image")
plt.show()

Doing this with video.

In [None]:
def process_frame_or_batch(frame_tensor, base_image, style_reference_image, optimizer):

    frame_tensor = tf.Variable(frame_tensor)  # Ensure the tensor is trainable

    loss, grads = compute_loss_and_grads(frame_tensor, base_image, style_reference_image)
    optimizer.apply_gradients([(grads, frame_tensor)])

    return loss, frame_tensor


In [None]:
from typing import Union
import numpy as np
import tensorflow as tf

ImageType = Union[np.ndarray, tf.Tensor]

def frame_image_read(image : ImageType) -> tf.Tensor:
  max_dim=512
  image= tf.convert_to_tensor(image, dtype = tf.float32)
  image= image/255.0
  shape = tf.cast(tf.shape(image)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim/long_dim
  new_shape = tf.cast(shape*scale, tf.int32)
  new_image = tf.image.resize(image, new_shape)
  new_image = new_image[tf.newaxis, :]
  
  return new_image

In [None]:
import cv2
import tensorflow as tf
import numpy as np
def neural_video_transfer(base_image_path : str, style_reference_image_path : list[str],video_path : str = "videos/content/coast.mp4", output_video_path : str  = "videos/output/output_video.mp4", img_height : int = 400, img_width : int = 400):
    with tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0'):
        base_image = preprocess_image(base_image_path)
        style_images = [preprocess_image(path) for path in style_reference_image_paths]
        style_reference_image = tf.concat(style_images, axis=0)  
        combination_image = tf.Variable(preprocess_image(base_image_path))

    # Initialize the video capture and writer
    video = cv2.VideoCapture(video_path)
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # Process frames one by one
    while True:
        ret, frame = video.read()
        if not ret:
            break  # End of video

        # Preprocess the frame (convert BGR to RGB)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_tensor = frame_image_read(frame_rgb)  # Use your preprocessing function

        # Resize the frame_tensor to match the dimensions of base_image
        frame_tensor_resized = tf.image.resize(frame_tensor, (img_height, img_width))

        # Apply the style transfer process
        loss, processed_frame = process_frame_or_batch(frame_tensor_resized, base_image, style_reference_image, optimizer)
        
        # Post-process the frame
        frame_output = deprocess_image(processed_frame.numpy())  # Use your deprocessing function
        frame_color_output = cv2.cvtColor(frame_output, cv2.COLOR_RGB2BGR)

        # Write the processed frame to the output video
        output_video.write(frame_color_output)

    # Release resources
    video.release()
    output_video.release()


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(3, 400, 535, 3))


ValueError: Unknown variable: <tf.Variable 'Variable:0' shape=(1, 400, 535, 3) dtype=float32, numpy=
array([[[[0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         ...,
         [0.80784315, 0.8392157 , 0.8745098 ],
         [0.80784315, 0.8392157 , 0.8745098 ],
         [0.80784315, 0.8392157 , 0.8745098 ]],

        [[0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         ...,
         [0.8067059 , 0.83807844, 0.87337255],
         [0.8067059 , 0.83807844, 0.87337255],
         [0.8067059 , 0.83807844, 0.87337255]],

        [[0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         [0.85882354, 0.8745098 , 0.88235295],
         ...,
         [0.8041176 , 0.83549017, 0.87078434],
         [0.8041176 , 0.83549017, 0.87078434],
         [0.8041176 , 0.83549017, 0.87078434]],

        ...,

        [[0.3721815 , 0.3721815 , 0.3094364 ],
         [0.38401297, 0.38401297, 0.32126787],
         [0.38384408, 0.38384408, 0.32109898],
         ...,
         [0.72009826, 0.6926473 , 0.5397061 ],
         [0.70749855, 0.6800476 , 0.5271064 ],
         [0.6989465 , 0.67149544, 0.51855433]],

        [[0.37590197, 0.37590197, 0.31315687],
         [0.39076027, 0.39076027, 0.32801518],
         [0.3840141 , 0.3840141 , 0.321269  ],
         ...,
         [0.7309894 , 0.70353836, 0.55059725],
         [0.71922123, 0.69177026, 0.5388291 ],
         [0.7145346 , 0.68708354, 0.53414243]],

        [[0.3764706 , 0.3764706 , 0.3137255 ],
         [0.39664835, 0.39664835, 0.33390325],
         [0.38316384, 0.38316384, 0.32041875],
         ...,
         [0.73158336, 0.7041323 , 0.5511912 ],
         [0.7327167 , 0.7052657 , 0.55232453],
         [0.72377455, 0.6963236 , 0.5433824 ]]]], dtype=float32)>. This optimizer can only be called for the variables it was originally built with. When working with a new set of variables, you should recreate a new optimizer instance.

Setup the video parameters

In [None]:
# Video file path
video_path = "videos/coast.mp4"
output_video_path = "output_video.mp4"

img_height = 400
img_width = 400

Perform neural video transfer

In [None]:

neural_video_transfer(base_image_path, style_reference_image_path, video_path, output_video_path, img_height, img_width)