Loading the dataset of choice.

In [1]:
import tensorflow as tf
import keras

base_image_path = "/kaggle/input/nst-images/images/san.png"
style_reference_image_path = "/kaggle/input/nst-images/images/starry_night.png"


2025-04-19 14:21:47.932084: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745072508.168444      18 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745072508.235041      18 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
original_width, original_height = keras.utils.load_img(base_image_path).size
img_height = 400
img_width = round(original_width * img_height / original_height) 

In [4]:
import numpy as np
from keras.applications import vgg19

In [5]:
import numpy as np
def preprocess_image(image_path):
 img = keras.utils.load_img(
 image_path, target_size=(img_height, img_width))
 img = keras.utils.img_to_array(img)
 img = np.expand_dims(img, axis=0)
 img = keras.applications.vgg19.preprocess_input(img)
 return tf.convert_to_tensor(img, dtype=tf.float32)
def deprocess_image(img):
 img = img.reshape((img_height, img_width, 3))
 img[:, :, 0] += 103.939
 img[:, :, 1] += 116.779
 img[:, :, 2] += 123.68
 img = img[:, :, ::-1]
 img = np.clip(img, 0, 255).astype("uint8")
 return img

In [6]:
model = keras.applications.vgg19.VGG19(weights="imagenet", include_top=False)
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict)

I0000 00:00:1745072522.759573      18 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [7]:
def content_loss(base_img, combination_img):
 return tf.reduce_sum(tf.square(combination_img - base_img))

In [8]:
def gram_matrix(x):
 x = tf.transpose(x, (2, 0, 1))
 features = tf.reshape(x, (tf.shape(x)[0], -1))
 gram = tf.matmul(features, tf.transpose(features))
 return gram
def style_loss(style_img, combination_img):
 S = gram_matrix(style_img)
 C = gram_matrix(combination_img)
 channels = 3
 size = img_height * img_width
 return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

In [9]:
def total_variation_loss(x):
    
    with tf.device("/GPU:0"):
        a = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]
        )
        b = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]
        )
        return tf.reduce_sum(tf.pow(a + b, 1.25))

In [10]:
style_layer_names = [
 "block1_conv1",
 "block2_conv1",
 "block3_conv1",
 "block4_conv1",
 "block5_conv1",
]
content_layer_name = "block5_conv2"
total_variation_weight = 1e-6

style_weight = 1e-6
content_weight = 2.5e-8
def compute_loss(combination_image, base_image, style_reference_image):
 input_tensor = tf.concat(
 [base_image, style_reference_image, combination_image], axis=0)
 features = feature_extractor(input_tensor)
 loss = tf.zeros(shape=())
 layer_features = features[content_layer_name]
 base_image_features = layer_features[0, :, :, :]
 combination_features = layer_features[2, :, :, :]
 loss = loss + content_weight * content_loss(
 base_image_features, combination_features
 )
 for layer_name in style_layer_names:
    layer_features = features[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    style_loss_value = style_loss(
    style_reference_features, combination_features)
    loss += (style_weight / len(style_layer_names)) * style_loss_value
    
 loss += total_variation_weight * total_variation_loss(combination_image)
 return loss

Set the policy.

In [11]:
from tensorflow.keras.mixed_precision import set_global_policy
#set_global_policy('mixed_float16')

In [12]:
import tensorflow as tf
@tf.function
def compute_loss_and_grads(
    combination_image, base_image, style_reference_image):
    with tf.device('/GPU:0'):  
        with tf.GradientTape() as tape:
            loss = compute_loss(
            combination_image, base_image, style_reference_image)
        grads = tape.gradient(loss, combination_image)
        return loss, grads

In [13]:
optimizer = keras.optimizers.SGD(
 keras.optimizers.schedules.ExponentialDecay(
 initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96
 )
)

In [14]:
with tf.device('/GPU:0'):
    base_image = preprocess_image(base_image_path)
    style_reference_image = preprocess_image(style_reference_image_path)
    combination_image = tf.Variable(preprocess_image(base_image_path))

In [15]:

iterations = 4000
for i in range(1, iterations + 1):
    with tf.device('/GPU:0'):  # Place operations explicitly on GPU
        loss, grads = compute_loss_and_grads(
            combination_image, base_image, style_reference_image
        )
    optimizer.apply_gradients([(grads, combination_image)])
    if i % 100 == 0:
        print(f"Iteration {i}: loss={loss:.2f}")
        img = deprocess_image(combination_image.numpy())
        fname = f"combination_image_at_iteration_{i}.png"
        keras.utils.save_img(fname, img) 

I0000 00:00:1745072529.213807      56 cuda_dnn.cc:529] Loaded cuDNN version 90300


Iteration 100: loss=6510.10
Iteration 200: loss=5334.66
Iteration 300: loss=4852.72
Iteration 400: loss=4588.87
Iteration 500: loss=4420.04
Iteration 600: loss=4301.45
Iteration 700: loss=4213.08
Iteration 800: loss=4144.49
Iteration 900: loss=4089.65
Iteration 1000: loss=4044.38
Iteration 1100: loss=4006.41
Iteration 1200: loss=3974.07
Iteration 1300: loss=3946.29
Iteration 1400: loss=3922.13
Iteration 1500: loss=3900.85
Iteration 1600: loss=3882.04
Iteration 1700: loss=3865.26
Iteration 1800: loss=3850.22
Iteration 1900: loss=3836.66
Iteration 2000: loss=3824.38
Iteration 2100: loss=3813.20
Iteration 2200: loss=3802.97
Iteration 2300: loss=3793.60
Iteration 2400: loss=3784.97
Iteration 2500: loss=3776.98
Iteration 2600: loss=3769.59
Iteration 2700: loss=3762.77
Iteration 2800: loss=3756.43
Iteration 2900: loss=3750.55
Iteration 3000: loss=3745.05
Iteration 3100: loss=3739.93
Iteration 3200: loss=3735.15
Iteration 3300: loss=3730.68
Iteration 3400: loss=3726.46
Iteration 3500: loss=37

Doing this with video.

In [16]:
def process_frame_or_batch(frame_tensor, base_image, style_reference_image, optimizer):

    frame_tensor = tf.Variable(frame_tensor)  # Ensure the tensor is trainable

    loss, grads = compute_loss_and_grads(frame_tensor, base_image, style_reference_image)
    optimizer.apply_gradients([(grads, frame_tensor)])

    return loss, frame_tensor


In [17]:
from typing import Union
import numpy as np
import tensorflow as tf

ImageType = Union[np.ndarray, tf.Tensor]


def frame_image_read(image : ImageType) -> tf.Tensor:
  max_dim=512
  image= tf.convert_to_tensor(image, dtype = tf.float32)
  image= image/255.0
  shape = tf.cast(tf.shape(image)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim/long_dim
  new_shape = tf.cast(shape*scale, tf.int32)
  new_image = tf.image.resize(image, new_shape)
  new_image = new_image[tf.newaxis, :]
  
  return new_image


In [18]:
import cv2
import tensorflow as tf
import numpy as np
def neural_video_transfer(base_image_path, style_reference_image_path,video_path : str = "videos/coast.mp4", output_video_path : str  = "output_video.mp4", img_height : int = 400, img_width : int = 400):
    # Load the base and style reference images
    base_image = preprocess_image(base_image_path)
    style_reference_image = preprocess_image(style_reference_image_path)

    # Initialize the video capture and writer
    video = cv2.VideoCapture(video_path)
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # Process frames one by one
    while True:
        ret, frame = video.read()
        if not ret:
            break  # End of video

        # Preprocess the frame (convert BGR to RGB)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_tensor = frame_image_read(frame_rgb)  # Use your preprocessing function

        # Resize the frame_tensor to match the dimensions of base_image
        frame_tensor_resized = tf.image.resize(frame_tensor, (img_height, img_width))

        # Apply the style transfer process
        loss, processed_frame = process_frame_or_batch(frame_tensor_resized, base_image, style_reference_image, optimizer)
        
        # Post-process the frame
        frame_output = deprocess_image(processed_frame.numpy())  # Use your deprocessing function
        frame_color_output = cv2.cvtColor(frame_output, cv2.COLOR_RGB2BGR)

        # Write the processed frame to the output video
        output_video.write(frame_color_output)

    # Release resources
    video.release()
    output_video.release()
# Video file path
video_path = "videos/coast.mp4"
output_video_path = "output_video.mp4"

# Read video using OpenCV
video = cv2.VideoCapture(video_path)
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video.get(cv2.CAP_PROP_FPS))

# Initialize Video Writer for output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Process frames one by one
while True:
    ret, frame = video.read()
    if not ret:
        break  # End of video

    # Preprocess the frame (convert BGR to RGB)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_tensor = frame_image_read(frame_rgb)  # Use your preprocessing function

    # Resize the frame_tensor to match the dimensions of base_image
    frame_tensor_resized = tf.image.resize(frame_tensor, (img_height, img_width))

    # Apply the style transfer process
    loss, processed_frame = process_frame_or_batch(frame_tensor_resized, base_image, style_reference_image, optimizer)
    # Post-process the frame
    frame_output = deprocess_image(processed_frame.numpy())  # Use your deprocessing function
    frame_color_output = cv2.cvtColor(frame_output, cv2.COLOR_RGB2BGR)

    # Write the processed frame to the output video
    output_video.write(frame_color_output)

# Release resources
video.release()
output_video.release()


In [19]:
import cv2
import tensorflow as tf
import numpy as np
def apply_camera(output_camera_video_path : str = "output_video.mp4"):
    # Access the camera using OpenCV
    camera = cv2.VideoCapture(0)  # "0" usually refers to the default webcam
    frame_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(camera.get(cv2.CAP_PROP_FPS))

    # Initialize Video Writer for output
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_camera_video = cv2.VideoWriter(output_camera_video_path, fourcc, fps, (frame_width, frame_height))


    while True:
        ret, frame = camera.read()
        if not ret:
            break  


        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_tensor = frame_image_read(frame_rgb) 

        frame_tensor_resized = tf.image.resize(frame_tensor, (img_height, img_width))

        
        loss, processed_frame = process_frame_or_batch(frame_tensor_resized, base_image, style_reference_image, optimizer)
        

        frame_output = deprocess_image(processed_frame.numpy())  
        frame_color_output = cv2.cvtColor(frame_output, cv2.COLOR_RGB2BGR)

        output_camera_video.write(frame_color_output)

        cv2.imshow('Processed Frame', frame_color_output)
        

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    camera.release()
    output_camera_video.release()
    cv2.destroyAllWindows()
