All the effort invested in developing and testing the individual pipelines—ranging from the scratch-built VGG19 and MobileNet optimizers to the integration of the AdaIN approach—was ultimately consolidated into a unified Gradio interface. This 3-in-1 application provides a streamlined, user-friendly platform where each model can be selected, styles blended, and features such as colour preservation, foreground masking, and sharpness control applied. Bringing these pipelines together in a single interface not only highlights the robustness of each approach but also demonstrates how they complement one another within a cohesive prototype.

In [6]:

# Unified NST Gradio App with Metrics
# AdaIN (TFHub) | VGG Optimizer | MobileNet Optimizer in one UI

# Install all necessary packages
!pip install -q tensorflow tensorflow_hub gradio opencv-python torch torchvision lpips scikit-image

# Imports
import gradio as gr
import numpy as np
import tensorflow as tf
# Pretrained models from TFHub
import tensorflow_hub as hub
# OpenCV for image processing
import cv2
import torch
# Perceptual similarity metric
import lpips
# Pretrained models
from tensorflow.keras.applications import vgg19, mobilenet_v2
from tensorflow.keras.models import Model
# Import image utilities
from PIL import Image, ImageFilter
from skimage.metrics import structural_similarity as ssim

# Common helper functions

# Convert any numpy image to float32 [0,1]
def to_float01(np_img):
    arr = np_img.astype(np.float32)
    # If in [0,255] integers
    if np_img.dtype == np.uint8:
        # Normalize to [0,1]
        arr = arr / 255.0
    return np.clip(arr, 0, 1)


# Create mask where near-black pixels are in the content's foreground
def make_black_mask_from_np(content_np_resized, threshold=0.05):
    # Check if ALL RGB channels at each pixel are below "0.05"
    # Return "True" for pixels that are darker than the threshold in every channel
    mask = np.all(content_np_resized < threshold, axis=-1).astype(np.float32)
    return mask[..., None]


# Preserve content colours (stylize luminance, keep chroma)
def apply_colour_preservation(stylized_f01, content_resized_f01):
    # OpenCV expects 8-bit inputs for colour conversion
    # So, Convert both stylised and content images to uint8 [0,255]
    stylized_u8 = (np.clip(stylized_f01,0,1)*255).astype(np.uint8)
    content_u8  = (np.clip(content_resized_f01,0,1)*255).astype(np.uint8)
    # Convert both style and content images from RGB to YUV colour space
    stylized_yuv = cv2.cvtColor(stylized_u8, cv2.COLOR_RGB2YUV)
    content_yuv  = cv2.cvtColor(content_u8, cv2.COLOR_RGB2YUV)
    # Replace content’s luminance channel with stylized luminance channel to keep content colour
    combined_yuv = content_yuv.copy()
    combined_yuv[...,0] = stylized_yuv[...,0]
    # Convert the final YUV image back to RGB
    final_rgb = cv2.cvtColor(combined_yuv, cv2.COLOR_YUV2RGB)
    return final_rgb.astype(np.float32)/255.0


# Apply sharpness enhancement using UnsharpMask filter
def apply_sharpness(stylized_f01, sharpness_value):
    img_u8 = (np.clip(stylized_f01,0,1)*255).astype(np.uint8)
    pil_img = Image.fromarray(img_u8)
    pil_img = pil_img.filter(ImageFilter.UnsharpMask(
        # Apply sharpen controls and filters
        radius=1.5, percent=int(sharpness_value*200)))
    return np.asarray(pil_img).astype(np.float32)/255.0


# Collect style images with their weights and normalize weights
def collect_styles_and_weights(styles, weights):
    # Keep only non-empty styles
    pairs = [(img,w) for img,w in zip(styles,weights) if img is not None]
    if not pairs:
        raise ValueError("Need ≥1 style image")
    # Separate images and weights
    imgs, ws = zip(*pairs)
    total = float(sum(ws))
    if total <= 1e-8:
        ws = [1.0] + [0.0]*(len(imgs)-1)
    else:
        ws = [float(w)/total for w in ws]
    return list(imgs), ws

# Computes SSIM and LPIPS metrics for evaluation
# Load LPIPS model with AlexNet backbone
lpips_model = lpips.LPIPS(net='alex').eval()

# Compute SSIM and LPIPS metrics
def compute_metrics(stylized_np, content_pil, style_imgs, blend_weights):
    # Helper function to resize image to 256x256 RGB
    def prep(im):
        return cv2.resize(np.array(im.convert("RGB")), (256,256))

    # Prepare stylised and content images at 256x256
    # This ensures all inputs are the same size before computing metrics
    stylized_256 = prep(Image.fromarray((stylized_np*255).astype(np.uint8)))
    content_256  = prep(content_pil)

    # Compute weighted average blended style from all provided style images
    blended_style_stack = np.zeros_like(stylized_256, dtype=np.float32)
    for img,w in zip(style_imgs, blend_weights):
        if img is not None:
            blended_style_stack += prep(img).astype(np.float32) * w
    # The final stack represents the reference style image against which LPIPS is measured
    blended_style_stack = np.clip(blended_style_stack,0,255).astype(np.uint8)

    # SSIM((Structural Similarity Index Measure)) metric to compute structural similarity with content
    ssim_val = ssim(content_256, stylized_256, channel_axis=-1)

    # Helper function to convert numpy image to Torch tensor in [-1,1]
    def to_torch(np_img):
        tens = torch.from_numpy(np_img.astype(np.float32)/255.0).permute(2,0,1).unsqueeze(0)*2-1
        return tens

    # Convert all three images (stylized, content, blended style) to Torch tensors
    styl_t_img = to_torch(stylized_256)
    cont_t_img = to_torch(content_256)
    style_t= to_torch(blended_style_stack)

    # LPIPS vs content
    lpips_content = lpips_model(styl_t_img, cont_t_img).item()
    # LPIPS vs style
    lpips_style   = lpips_model(styl_t_img, style_t).item()
    return ssim_val, lpips_content, lpips_style


Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /usr/local/lib/python3.12/dist-packages/lpips/weights/v0.1/alex.pth


In [7]:
# AdaIN (TFHub Pretrained)
# Load pretrained AdaIN model from TFHub
adain_model = hub.load("https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2")

# Convert PIL image to TensorFlow tensor [0,1]
def load_image_from_upload(image, target_size=(256,256)):
    # Resize to target size
    image = image.convert("RGB").resize(target_size)
    # Normalize
    img = np.array(image).astype(np.float32)/255.0
    # Add batch dim
    return tf.convert_to_tensor(img[None,...], dtype=tf.float32)


# Blend multiple styles with AdaIN model
def blend_styles_with_adain(content_img, style_imgs, weights):
    # Normalize weights
    weights = np.array(weights); weights = weights/weights.sum()
    # Content tensor
    content_tensor = load_image_from_upload(content_img)
    # Style tensors
    style_tensors = [load_image_from_upload(img) for img in style_imgs]

    # Weighted blend of styles
    blended = tf.zeros_like(style_tensors[0])
    for i,st in enumerate(style_tensors):
        blended += st*weights[i]

    # Run AdaIN model
    stylized = adain_model(content_tensor, blended)[0]
    # Remove batch dimension
    out = tf.squeeze(stylized).numpy()
    out = np.clip(out*255,0,255).astype("uint8")
    return Image.fromarray(out)


# Gradio callback for AdaIN stylization
def stylize_ui_adain(content,s1,s2,s3,s4,s5,
                     w1,w2,w3,w4,w5,
                     alpha,beta,preserve_colour,foreground_only,sharpness):
    # Collect styles and normalize weights
    style_imgs,blend_weights = collect_styles_and_weights([s1,s2,s3,s4,s5],[w1,w2,w3,w4,w5])

    # Run AdaIN with blended styles
    out = np.array(blend_styles_with_adain(content,style_imgs,blend_weights)).astype(np.float32)/255.0

    # Resize both stylized and content to 512x512
    stylized_resized = cv2.resize((out*255).astype(np.uint8), (512,512),
                                  interpolation=cv2.INTER_CUBIC).astype(np.float32)/255.0
    content_resized  = np.array(content.resize((512,512))).astype(np.float32)/255.0

    # Apply optional extras
    # Keep content colours
    if preserve_colour:
        stylized_resized = apply_colour_preservation(stylized_resized, content_resized)
    # Add sharpness controls
    if sharpness>0:
        stylized_resized = apply_sharpness(stylized_resized, sharpness)
    # Apply style only on foreground
    if foreground_only:
        mask = make_black_mask_from_np(content_resized, threshold=0.05)
        stylized_resized = mask*stylized_resized + (1.0-mask)*content_resized

    return np.clip(stylized_resized,0,1)


In [8]:
# Build VGG Optimizer

# Extract selected VGG layers
def vgg_layers(layer_names):
    # Load VGG19 model
    vgg = vgg19.VGG19(include_top=False,weights='imagenet')
    vgg.trainable = False
    # Collect requested layers
    outputs = [vgg.get_layer(n).output for n in layer_names]
    return Model([vgg.input], outputs)


# Compute Gram matrix (for style loss)
def gram_matrix(x):
    # Inner product
    result = tf.linalg.einsum('bijc,bijd->bcd', x, x)
    # Normalise factor
    num = tf.cast(tf.shape(x)[1]*tf.shape(x)[2], tf.float32)
    return result / num


# Style and Content feature extractor using VGG
class VGGStyleContentModel(tf.keras.models.Model):
    def __init__(self, style_layers, content_layers):
        super().__init__()
        # Load model with style and content layers
        self.vgg = vgg_layers(style_layers+content_layers)
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style = len(style_layers)
    def call(self, inputs):
        inputs = inputs*255.0
        # VGG preprocessing
        pre = vgg19.preprocess_input(inputs)
        outs = self.vgg(pre)
        s_outs, c_outs = outs[:self.num_style], outs[self.num_style:]
        # Convert style feats to Gram matrices
        s_outs = [gram_matrix(s) for s in s_outs]
        return {'style':{n:v for n,v in zip(self.style_layers,s_outs)},
                'content':{n:v for n,v in zip(self.content_layers,c_outs)}}


# Preprocess PIL image for VGG [0,1] → batch tensor
def preprocess_img_vgg(pil_img):
    img = np.array(pil_img).astype(np.float32)/255.0
    img = tf.image.resize(img,(512,512))
    return img[tf.newaxis,:]


# Blend multiple style targets
def blend_style_targets(style_images, extractor, blend_weights):
    blended = None
    for i,img in enumerate(style_images):
        target = extractor(preprocess_img_vgg(img))['style']
        # Initialize with weighted first style
        if blended is None:
            blended = {k:v*blend_weights[i] for k,v in target.items()}
        # Add weighted styles to existing dict
        else:
            [blended.update({k:blended[k]+target[k]*blend_weights[i]}) for k in blended]
    return blended


# Optimization loop for VGG-based NST
def run_style_transfer_vgg(content, style_imgs, blend_weights, alpha, beta):
    content_in = preprocess_img_vgg(content)
    extractor = VGGStyleContentModel(
        # style layers
        ['block1_conv1','block2_conv1','block3_conv1','block4_conv1','block5_conv1'],
        # content layer
        ['block5_conv2'])
    targets = {
        'style': blend_style_targets(style_imgs, extractor, blend_weights),
        'content': extractor(content_in)['content']
    }

    # Start with content image as variable
    img = tf.Variable(content_in)
    # Adam optimizer
    opt = tf.optimizers.Adam(0.02)

    # 50 iterations optimization
    for _ in range(50):
        with tf.GradientTape() as tape:
            out = extractor(img)
            s_loss = tf.add_n([tf.reduce_mean((out['style'][k]-targets['style'][k])**2) for k in targets['style']])
            c_loss = tf.add_n([tf.reduce_mean((out['content'][k]-targets['content'][k])**2) for k in targets['content']])
            loss = alpha*c_loss + beta*s_loss
        # Compute gradients
        grad = tape.gradient(loss, img)
        # Apply update
        opt.apply_gradients([(grad,img)])
        # Keep valid range
        img.assign(tf.clip_by_value(img,0,1))
    return tf.squeeze(img).numpy()


# Gradio callback for VGG optimizer stylization
def stylize_ui_vgg(content,s1,s2,s3,s4,s5,
                   w1,w2,w3,w4,w5,
                   alpha,beta,preserve_colour,foreground_only,sharpness):
    # Collect style images and weights
    style_imgs,blend_weights = collect_styles_and_weights([s1,s2,s3,s4,s5],[w1,w2,w3,w4,w5])
    # Run optimization
    out = run_style_transfer_vgg(content, style_imgs, blend_weights, alpha, beta)
    # Resize content for extras
    content_resized = np.array(content.resize((512,512))).astype(np.float32)/255.0

    # Apply optional extras
    if preserve_colour:
        out = apply_colour_preservation(out, content_resized)
    if sharpness>0:
        out = apply_sharpness(out, sharpness)
    if foreground_only:
        mask = make_black_mask_from_np(content_resized, threshold=0.05)
        out = mask*out + (1.0-mask)*content_resized
    return np.clip(out,0,1)


In [9]:
# MobileNet Optimizer (Multi-stage)

# Define the style layers we want to extract from MobileNetV2
STYLE_LAYERS = [
    'block_1_expand_relu','block_3_expand_relu','block_6_expand_relu',
    'block_10_expand_relu','block_13_expand_relu'
]

# Assign different weights to each style layer (lower layers capture fine textures, deeper layers capture structure)
STYLE_LAYER_WEIGHTS = {
    # Early layer has the highest weight
    'block_1_expand_relu':1.0,
    'block_3_expand_relu':0.8,
    'block_6_expand_relu':0.6,
    'block_10_expand_relu':0.5,
    # Deepest style layer has the lowest weight
    'block_13_expand_relu':0.4
}

# Define the content layers (deep layers preserve high-level structure)
CONTENT_LAYERS = ['block_13_expand_relu','block_16_project']


# MobileNet feature extractor class
class MNetStyleContentModel(tf.keras.models.Model):
    def __init__(self, style_layers, content_layers, img_size):
        super().__init__()
        # Store style and content layer names
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style = len(style_layers)

        # Load pretrained MobileNetV2
        base = mobilenet_v2.MobileNetV2(include_top=False,
                                        weights='imagenet',
                                        input_shape=(img_size,img_size,3))
        base.trainable = False

        # Collect outputs from specified style and content layers
        outs = [base.get_layer(n).output for n in style_layers+content_layers]

        # Build encoder model that outputs these intermediate activations
        self.encoder = Model([base.input], outs)

    def call(self, x):
        # Pass input image through MobileNet encoder
        outs = self.encoder(x)

        # Compute Gram matrices for style outputs (first part of outs)
        style_outs = [gram_matrix(o) for o in outs[:self.num_style]]

        # Return dictionary with style and content features
        return {
            'style': {n:v for n,v in zip(self.style_layers, style_outs)},
            'content': {n:v for n,v in zip(self.content_layers, outs[self.num_style:])}
        }


# Preprocess input PIL image for MobileNetV2
def preprocess_for_model(pil_img, img_size):
   # Convert PIL → numpy [0,1]
    x = np.array(pil_img).astype(np.float32)/255.0
    # Resize to target size
    x = tf.image.resize(x, (img_size,img_size), antialias=True)
    # Apply MobileNet-specific preprocessing
    x = mobilenet_v2.preprocess_input(x*255.0)
    return x[None,...]


# Convert MobileNet output ([-1,1]) back to [0,1] float image
def deprocess_from_model(x_minus1_to1):
    # Remove batch dimension if present
    x = tf.squeeze(x_minus1_to1,0)
    x = tf.clip_by_value((x+1.0)/2.0, 0.0, 1.0)
    return x.numpy().astype(np.float32)


# Blend multiple style targets for MobileNet
def blended_style_targets(extractor, style_pils, blend_weights, img_size):
    accum = None
    for img, w in zip(style_pils, blend_weights):
        # Extract style features for current image
        st = extractor(preprocess_for_model(img, img_size))['style']
        # First style image to initialise accumulator
        if accum is None:
            accum = {k: v*w for k,v in st.items()}
        # Add weighted style features to accumulator
        else:
            [accum.update({k: accum[k] + st[k]*w}) for k in accum]

    # Return weighted blended style targets
    return accum


# Laplacian operator for edge loss

def laplacian(img_minus1_to1):
    # Convert input image from [-1, 1] range to [0, 1]
    x = (img_minus1_to1 + 1.0) / 2.0

    # Define Laplacian kernel (edge detector)
    k = tf.constant([[0., -1., 0.],
                     [-1., 4., -1.],
                     [0., -1., 0.]], tf.float32)

    # Reshape kernel to 4D shape [H,W,in_channels,out_channels]
    k = tf.reshape(k, [3, 3, 1, 1])

    # Repeat kernel for all 3 RGB channels
    k = tf.repeat(k, 3, axis=2)

    # Apply convolution (SAME padding keeps size)
    return tf.nn.conv2d(x, k, strides=1, padding='SAME')


# Multi-stage optimization loop (224 → 512 → 768 )

def run_stage(content_pil, style_pils, blend_weights, img_size,
              steps, lr_start, lr_end, alpha_content, beta_style,
              tv_weight=1e-6, edge_weight=5e-3, init_from=None):

    # Preprocess the content image to MobileNet format at target resolution
    content = preprocess_for_model(content_pil, img_size)

    # Initialize MobileNet-based extractor with style/content layers
    extractor = MNetStyleContentModel(STYLE_LAYERS, CONTENT_LAYERS, img_size)

    # Get blended style features from all style images
    style_tgt = blended_style_targets(extractor, style_pils, blend_weights, img_size)

    # Get content target features
    content_tgt = extractor(content)['content']

    # If no initialization image is passed
    if init_from is None:
        # Use weighted mix of content and first style image as seed
        style_seed = preprocess_for_model(style_pils[0], img_size)
        seed = tf.clip_by_value(((content + 1) / 2.0) * 0.6 + ((style_seed + 1) / 2.0) * 0.4, 0, 1)
        image = tf.Variable(mobilenet_v2.preprocess_input(seed * 255.0))
    else:
        # Upscale previous stage output to current size and use it as init
        up = tf.image.resize(init_from, (img_size, img_size), method='bicubic', antialias=True)
        image = tf.Variable(up)
    lr_schedule = tf.keras.optimizers.schedules.CosineDecay(lr_start, steps, alpha=lr_end / lr_start)
    opt = tf.optimizers.Adam(lr_schedule)

    # Normalize style loss by total style weights
    denom = sum(STYLE_LAYER_WEIGHTS.values())

    # Define one training step
    @tf.function
    def train_step(img):
        with tf.GradientTape() as tape:
            # Extract features from current image
            outs = extractor(img)
            s, c = outs['style'], outs['content']

            # Compute style loss (weighted MSE across layers)
            s_loss = tf.add_n([
                STYLE_LAYER_WEIGHTS[k] * tf.reduce_mean((s[k] - style_tgt[k]) ** 2)
                for k in s
            ]) * (beta_style / denom)

            # Compute content loss (MSE across layers)
            c_loss = tf.add_n([
                tf.reduce_mean((c[k] - content_tgt[k]) ** 2)
                for k in c
            ]) * (alpha_content / len(CONTENT_LAYERS))

            # Add total variation loss (encourages smoothness)
            tv = tf.reduce_mean(tf.image.total_variation(img)) * tv_weight

            # Add edge loss (match Laplacian edges of content and stylized)
            edge = tf.reduce_mean((laplacian(img) - laplacian(content)) ** 2) * edge_weight

            # Final total loss
            loss = s_loss + c_loss + tv + edge

        # Compute gradients of loss wrt. image
        grad = tape.gradient(loss, img)

        # Clip gradients to avoid exploding updates
        grad = tf.clip_by_norm(grad, 10.0)

        # Apply optimizer step
        opt.apply_gradients([(grad, img)])

        # Clamp image values back into valid range [-1, 1]
        img.assign(tf.clip_by_value(img, -1, 1))
        return loss

    # Run training loop for the specified number of steps
    for _ in range(steps):
        _ = train_step(image)

    # Return optimized image tensor
    return image

# Gradio callback for MobileNet style transfer run

def stylize_ui_mnet(content, s1, s2, s3, s4, s5,
                    w1, w2, w3, w4, w5,
                    alpha, beta, preserve_colour, foreground_only, sharpness):

    # Collect style images and normalize weights
    style_imgs, blend_weights = collect_styles_and_weights(
        [s1, s2, s3, s4, s5],
        [w1, w2, w3, w4, w5]
    )

    # Stage 1: Optimize at 224x224 resolution
    out_224 = run_stage(content, style_imgs, blend_weights, 224, 400, 0.05, 0.01, alpha, beta)
    # Stage 2: Optimize at 512x512 resolution, initialized from stage 1
    out_512 = run_stage(content, style_imgs, blend_weights, 512, 350, 0.03, 0.006, alpha * 2, beta, init_from=out_224)
    # Stage 3: Optimize at 768x768 resolution, initialized from stage 2
    out_768 = run_stage(content, style_imgs, blend_weights, 768, 300, 0.02, 0.004, alpha * 3, beta, init_from=out_512)

    # Convert final output tensor back to [0,1] float image
    stylized = deprocess_from_model(out_768)

    # Resize final result to 512x512 for UI consistency
    stylized = tf.image.resize(stylized, (512, 512), antialias=True, method='bicubic').numpy()

    # Convert content image to float [0,1] numpy
    content_np = to_float01(np.array(content))

    # Resize content to 512x512 for post-processing steps
    content_resized = np.array(
        Image.fromarray((content_np * 255).astype(np.uint8)).resize((512, 512), Image.BILINEAR)
    ).astype(np.float32) / 255.0

    # If user enabled colour preservation
    if preserve_colour:
        # Retain colours from the original content image and combine with the textures from the stylised image
        stylized = apply_colour_preservation(stylized, content_resized)

    # If user enabled sharpness, apply unsharp mask filter
    if sharpness > 0:
        stylized = apply_sharpness(stylized, sharpness)

    # If user enabled foreground-only styling, mask out background
    if foreground_only:
        mask = make_black_mask_from_np(content_resized, threshold=0.05)
        stylized = mask * stylized + (1.0 - mask) * content_resized

    # Return final image clipped into [0,1]
    return np.clip(stylized, 0, 1)



In [10]:
#Implement final 3-in-1 Gradio NST App
# Dispatch routes request to chosen NST model
def unified_stylize(model_choice, content,
                    s1, s2, s3, s4, s5,
                    w1, w2, w3, w4, w5,
                    alpha, beta, preserve_colour, foreground_only, sharpness):

    # Run AdaIN model if selected
    if model_choice == "AdaIN":
        out = stylize_ui_adain(content, s1, s2, s3, s4, s5,
                               w1, w2, w3, w4, w5,
                               alpha, beta, preserve_colour, foreground_only, sharpness)

    # Run VGG optimizer if selected
    elif model_choice == "VGG Optimizer":
        out = stylize_ui_vgg(content, s1, s2, s3, s4, s5,
                             w1, w2, w3, w4, w5,
                             alpha, beta, preserve_colour, foreground_only, sharpness)

    # Run MobileNet optimizer if selected
    elif model_choice == "MobileNet Optimizer":
        out = stylize_ui_mnet(content, s1, s2, s3, s4, s5,
                              w1, w2, w3, w4, w5,
                              alpha, beta, preserve_colour, foreground_only, sharpness)

    # Compute SSIM and LPIPS metrics for evaluation
    style_imgs, blend_weights = collect_styles_and_weights([s1, s2, s3, s4, s5],
                                                      [w1, w2, w3, w4, w5])
    ssim_val, lpips_c, lpips_s = compute_metrics(out, content, style_imgs, blend_weights)

    # Return stylised image and metrics
    return out, ssim_val, lpips_c, lpips_s

# Gradio User Interface
with gr.Blocks() as demo:

    # Title and description
    # Higher SSIM , better performance
    # Lower LPIPS , better performance
    gr.Markdown("##  3-in-1 NST App : AdaIN | VGG Optimizer | MobileNet Optimizer")
    gr.Markdown(
        "After you click \"Stylise\" — your chosen model makes the stylised image, "
        "then metrics are computed:\n"
    )

    # Create dropdown menu for model selection
    model_choice = gr.Dropdown(
        ["AdaIN", "VGG Optimizer", "MobileNet Optimizer"],
        value="AdaIN",
        label="Select Model"
    )

    # Upload option for content image
    content_input = gr.Image(
        label="Content Image",
        type="pil",
        image_mode="RGB",
        height=256,
        width=256
    )

    # Row of 5 style image upload option facilities
    with gr.Row():
        style_input1 = gr.Image(label="Style 1", type="pil")
        style_input2 = gr.Image(label="Style 2", type="pil")
        style_input3 = gr.Image(label="Style 3", type="pil")
        style_input4 = gr.Image(label="Style 4", type="pil")
        style_input5 = gr.Image(label="Style 5", type="pil")

    # Initialise Sliders to adjust style weights of 5 style images
    with gr.Row():
        blend1 = gr.Slider(0, 1, 1, step=0.01, label="Style 1 Strength")
        blend2 = gr.Slider(0, 1, 0, step=0.01, label="Style 2 Strength")
        blend3 = gr.Slider(0, 1, 0, step=0.01, label="Style 3 Strength")
        blend4 = gr.Slider(0, 1, 0, step=0.01, label="Style 4 Strength")
        blend5 = gr.Slider(0, 1, 0, step=0.01, label="Style 5 Strength")

    # Controls for content/style weights , colour preservation , foreground styling and sharpness controls
    with gr.Row():
        alpha_slider = gr.Slider(1, 5000, 1000, step=10, label="α (Content Weight)")
        beta_slider = gr.Slider(1e-3, 1.0, 1e-2, step=1e-3, label="β (Style Weight)")
        preserve_colour = gr.Checkbox(label="Apply Colour Preservation", value=False)
        foreground_only = gr.Checkbox(label="Apply Foreground-Aware Styling", value=False)
        sharpness = gr.Slider(0, 1, 0.5, step=0.05, label="Style Sharpness")

    # Run button
    run_btn = gr.Button("Stylise")

    # Output row to display final image and metrics
    with gr.Row():
        output_img = gr.Image(label="Stylised Output", image_mode="RGB", height=512, width=512)
        with gr.Column():
            ssim_metric = gr.Number(label="SSIM vs Content (↑ better)")
            lpips_content_metric = gr.Number(label="LPIPS vs Content (↓ better)")
            lpips_style_metric = gr.Number(label="LPIPS vs Blended Style (↓ better)")

    # Connect button to unified stylisation function
    run_btn.click(
        fn=unified_stylize,
        inputs=[model_choice, content_input,
                style_input1, style_input2, style_input3, style_input4, style_input5,
                blend1, blend2, blend3, blend4, blend5,
                alpha_slider, beta_slider, preserve_colour, foreground_only, sharpness],
        outputs=[output_img, ssim_metric, lpips_content_metric, lpips_style_metric]
    )

# Launch app
demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bcf51045289b1184fd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [1]:
#FULL CODE COMPILED TO ONE CELL FOR EASY RUN
# Unified NST Gradio App with Metrics
# AdaIN (TFHub) | VGG Optimizer | MobileNet Optimizer in one UI

# Install all necessary packages
!pip install -q tensorflow tensorflow_hub gradio opencv-python torch torchvision lpips scikit-image

# Imports
import gradio as gr
import numpy as np
import tensorflow as tf
# Pretrained models from TFHub
import tensorflow_hub as hub
# OpenCV for image processing
import cv2
import torch
# Perceptual similarity metric
import lpips
# Pretrained models
from tensorflow.keras.applications import vgg19, mobilenet_v2
from tensorflow.keras.models import Model
# Import image utilities
from PIL import Image, ImageFilter
from skimage.metrics import structural_similarity as ssim

# Common helper functions

# Convert any numpy image to float32 [0,1]
def to_float01(np_img):
    arr = np_img.astype(np.float32)
    # If in [0,255] integers
    if np_img.dtype == np.uint8:
        # Normalize to [0,1]
        arr = arr / 255.0
    return np.clip(arr, 0, 1)


# Create mask where near-black pixels are in the content's foreground
def make_black_mask_from_np(content_np_resized, threshold=0.05):
    # Check if ALL RGB channels at each pixel are below "0.05"
    # Return "True" for pixels that are darker than the threshold in every channel
    mask = np.all(content_np_resized < threshold, axis=-1).astype(np.float32)
    return mask[..., None]


# Preserve content colours (stylize luminance, keep chroma)
def apply_colour_preservation(stylized_f01, content_resized_f01):
    # OpenCV expects 8-bit inputs for colour conversion
    # So, Convert both stylised and content images to uint8 [0,255]
    stylized_u8 = (np.clip(stylized_f01,0,1)*255).astype(np.uint8)
    content_u8  = (np.clip(content_resized_f01,0,1)*255).astype(np.uint8)
    # Convert both style and content images from RGB to YUV colour space
    stylized_yuv = cv2.cvtColor(stylized_u8, cv2.COLOR_RGB2YUV)
    content_yuv  = cv2.cvtColor(content_u8, cv2.COLOR_RGB2YUV)
    # Replace content’s luminance channel with stylized luminance channel to keep content colour
    combined_yuv = content_yuv.copy()
    combined_yuv[...,0] = stylized_yuv[...,0]
    # Convert the final YUV image back to RGB
    final_rgb = cv2.cvtColor(combined_yuv, cv2.COLOR_YUV2RGB)
    return final_rgb.astype(np.float32)/255.0


# Apply sharpness enhancement using UnsharpMask filter
def apply_sharpness(stylized_f01, sharpness_value):
    img_u8 = (np.clip(stylized_f01,0,1)*255).astype(np.uint8)
    pil_img = Image.fromarray(img_u8)
    pil_img = pil_img.filter(ImageFilter.UnsharpMask(
        # Apply sharpen controls and filters
        radius=1.5, percent=int(sharpness_value*200)))
    return np.asarray(pil_img).astype(np.float32)/255.0


# Collect style images with their weights and normalize weights
def collect_styles_and_weights(styles, weights):
    # Keep only non-empty styles
    pairs = [(img,w) for img,w in zip(styles,weights) if img is not None]
    if not pairs:
        raise ValueError("Need ≥1 style image")
    # Separate images and weights
    imgs, ws = zip(*pairs)
    total = float(sum(ws))
    if total <= 1e-8:
        ws = [1.0] + [0.0]*(len(imgs)-1)
    else:
        ws = [float(w)/total for w in ws]
    return list(imgs), ws

# Computes SSIM and LPIPS metrics for evaluation
# Load LPIPS model with AlexNet backbone
lpips_model = lpips.LPIPS(net='alex').eval()

# Compute SSIM and LPIPS metrics
def compute_metrics(stylized_np, content_pil, style_imgs, blend_weights):
    # Helper function to resize image to 256x256 RGB
    def prep(im):
        return cv2.resize(np.array(im.convert("RGB")), (256,256))

    # Prepare stylised and content images at 256x256
    # This ensures all inputs are the same size before computing metrics
    stylized_256 = prep(Image.fromarray((stylized_np*255).astype(np.uint8)))
    content_256  = prep(content_pil)

    # Compute weighted average blended style from all provided style images
    blended_style_stack = np.zeros_like(stylized_256, dtype=np.float32)
    for img,w in zip(style_imgs, blend_weights):
        if img is not None:
            blended_style_stack += prep(img).astype(np.float32) * w
    # The final stack represents the reference style image against which LPIPS is measured
    blended_style_stack = np.clip(blended_style_stack,0,255).astype(np.uint8)

    # SSIM((Structural Similarity Index Measure)) metric to compute structural similarity with content
    ssim_val = ssim(content_256, stylized_256, channel_axis=-1)

    # Helper function to convert numpy image to Torch tensor in [-1,1]
    def to_torch(np_img):
        tens = torch.from_numpy(np_img.astype(np.float32)/255.0).permute(2,0,1).unsqueeze(0)*2-1
        return tens

    # Convert all three images (stylized, content, blended style) to Torch tensors
    styl_t_img = to_torch(stylized_256)
    cont_t_img = to_torch(content_256)
    style_t= to_torch(blended_style_stack)

    # LPIPS vs content
    lpips_content = lpips_model(styl_t_img, cont_t_img).item()
    # LPIPS vs style
    lpips_style   = lpips_model(styl_t_img, style_t).item()
    return ssim_val, lpips_content, lpips_style


# AdaIN (TFHub Pretrained)
# Load pretrained AdaIN model from TFHub
adain_model = hub.load("https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2")

# Convert PIL image to TensorFlow tensor [0,1]
def load_image_from_upload(image, target_size=(256,256)):
    # Resize to target size
    image = image.convert("RGB").resize(target_size)
    # Normalize
    img = np.array(image).astype(np.float32)/255.0
    # Add batch dim
    return tf.convert_to_tensor(img[None,...], dtype=tf.float32)


# Blend multiple styles with AdaIN model
def blend_styles_with_adain(content_img, style_imgs, weights):
    # Normalize weights
    weights = np.array(weights); weights = weights/weights.sum()
    # Content tensor
    content_tensor = load_image_from_upload(content_img)
    # Style tensors
    style_tensors = [load_image_from_upload(img) for img in style_imgs]

    # Weighted blend of styles
    blended = tf.zeros_like(style_tensors[0])
    for i,st in enumerate(style_tensors):
        blended += st*weights[i]

    # Run AdaIN model
    stylized = adain_model(content_tensor, blended)[0]
    # Remove batch dimension
    out = tf.squeeze(stylized).numpy()
    out = np.clip(out*255,0,255).astype("uint8")
    return Image.fromarray(out)


# Gradio callback for AdaIN stylization
def stylize_ui_adain(content,s1,s2,s3,s4,s5,
                     w1,w2,w3,w4,w5,
                     alpha,beta,preserve_colour,foreground_only,sharpness):
    # Collect styles and normalize weights
    style_imgs,blend_weights = collect_styles_and_weights([s1,s2,s3,s4,s5],[w1,w2,w3,w4,w5])

    # Run AdaIN with blended styles
    out = np.array(blend_styles_with_adain(content,style_imgs,blend_weights)).astype(np.float32)/255.0

    # Resize both stylized and content to 512x512
    stylized_resized = cv2.resize((out*255).astype(np.uint8), (512,512),
                                  interpolation=cv2.INTER_CUBIC).astype(np.float32)/255.0
    content_resized  = np.array(content.resize((512,512))).astype(np.float32)/255.0

    # Apply optional extras
    # Keep content colours
    if preserve_colour:
        stylized_resized = apply_colour_preservation(stylized_resized, content_resized)
    # Add sharpness controls
    if sharpness>0:
        stylized_resized = apply_sharpness(stylized_resized, sharpness)
    # Apply style only on foreground
    if foreground_only:
        mask = make_black_mask_from_np(content_resized, threshold=0.05)
        stylized_resized = mask*stylized_resized + (1.0-mask)*content_resized

    return np.clip(stylized_resized,0,1)


# Build VGG Optimizer

# Extract selected VGG layers
def vgg_layers(layer_names):
    # Load VGG19 model
    vgg = vgg19.VGG19(include_top=False,weights='imagenet')
    vgg.trainable = False
    # Collect requested layers
    outputs = [vgg.get_layer(n).output for n in layer_names]
    return Model([vgg.input], outputs)


# Compute Gram matrix (for style loss)
def gram_matrix(x):
    # Inner product
    result = tf.linalg.einsum('bijc,bijd->bcd', x, x)
    # Normalise factor
    num = tf.cast(tf.shape(x)[1]*tf.shape(x)[2], tf.float32)
    return result / num


# Style and Content feature extractor using VGG
class VGGStyleContentModel(tf.keras.models.Model):
    def __init__(self, style_layers, content_layers):
        super().__init__()
        # Load model with style and content layers
        self.vgg = vgg_layers(style_layers+content_layers)
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style = len(style_layers)
    def call(self, inputs):
        inputs = inputs*255.0
        # VGG preprocessing
        pre = vgg19.preprocess_input(inputs)
        outs = self.vgg(pre)
        s_outs, c_outs = outs[:self.num_style], outs[self.num_style:]
        # Convert style feats to Gram matrices
        s_outs = [gram_matrix(s) for s in s_outs]
        return {'style':{n:v for n,v in zip(self.style_layers,s_outs)},
                'content':{n:v for n,v in zip(self.content_layers,c_outs)}}


# Preprocess PIL image for VGG [0,1] → batch tensor
def preprocess_img_vgg(pil_img):
    img = np.array(pil_img).astype(np.float32)/255.0
    img = tf.image.resize(img,(512,512))
    return img[tf.newaxis,:]


# Blend multiple style targets
def blend_style_targets(style_images, extractor, blend_weights):
    blended = None
    for i,img in enumerate(style_images):
        target = extractor(preprocess_img_vgg(img))['style']
        # Initialize with weighted first style
        if blended is None:
            blended = {k:v*blend_weights[i] for k,v in target.items()}
        # Add weighted styles to existing dict
        else:
            [blended.update({k:blended[k]+target[k]*blend_weights[i]}) for k in blended]
    return blended


# Optimization loop for VGG-based NST
def run_style_transfer_vgg(content, style_imgs, blend_weights, alpha, beta):
    content_in = preprocess_img_vgg(content)
    extractor = VGGStyleContentModel(
        # style layers
        ['block1_conv1','block2_conv1','block3_conv1','block4_conv1','block5_conv1'],
        # content layer
        ['block5_conv2'])
    targets = {
        'style': blend_style_targets(style_imgs, extractor, blend_weights),
        'content': extractor(content_in)['content']
    }

    # Start with content image as variable
    img = tf.Variable(content_in)
    # Adam optimizer
    opt = tf.optimizers.Adam(0.02)

    # 50 iterations optimization
    for _ in range(50):
        with tf.GradientTape() as tape:
            out = extractor(img)
            s_loss = tf.add_n([tf.reduce_mean((out['style'][k]-targets['style'][k])**2) for k in targets['style']])
            c_loss = tf.add_n([tf.reduce_mean((out['content'][k]-targets['content'][k])**2) for k in targets['content']])
            loss = alpha*c_loss + beta*s_loss
        # Compute gradients
        grad = tape.gradient(loss, img)
        # Apply update
        opt.apply_gradients([(grad,img)])
        # Keep valid range
        img.assign(tf.clip_by_value(img,0,1))
    return tf.squeeze(img).numpy()


# Gradio callback for VGG optimizer stylization
def stylize_ui_vgg(content,s1,s2,s3,s4,s5,
                   w1,w2,w3,w4,w5,
                   alpha,beta,preserve_colour,foreground_only,sharpness):
    # Collect style images and weights
    style_imgs,blend_weights = collect_styles_and_weights([s1,s2,s3,s4,s5],[w1,w2,w3,w4,w5])
    # Run optimization
    out = run_style_transfer_vgg(content, style_imgs, blend_weights, alpha, beta)
    # Resize content for extras
    content_resized = np.array(content.resize((512,512))).astype(np.float32)/255.0

    # Apply optional extras
    if preserve_colour:
        out = apply_colour_preservation(out, content_resized)
    if sharpness>0:
        out = apply_sharpness(out, sharpness)
    if foreground_only:
        mask = make_black_mask_from_np(content_resized, threshold=0.05)
        out = mask*out + (1.0-mask)*content_resized
    return np.clip(out,0,1)

# MobileNet Optimizer (Multi-stage)

# Define the style layers we want to extract from MobileNetV2
STYLE_LAYERS = [
    'block_1_expand_relu','block_3_expand_relu','block_6_expand_relu',
    'block_10_expand_relu','block_13_expand_relu'
]

# Assign different weights to each style layer (lower layers capture fine textures, deeper layers capture structure)
STYLE_LAYER_WEIGHTS = {
    # Early layer has the highest weight
    'block_1_expand_relu':1.0,
    'block_3_expand_relu':0.8,
    'block_6_expand_relu':0.6,
    'block_10_expand_relu':0.5,
    # Deepest style layer has the lowest weight
    'block_13_expand_relu':0.4
}

# Define the content layers (deep layers preserve high-level structure)
CONTENT_LAYERS = ['block_13_expand_relu','block_16_project']


# MobileNet feature extractor class
class MNetStyleContentModel(tf.keras.models.Model):
    def __init__(self, style_layers, content_layers, img_size):
        super().__init__()
        # Store style and content layer names
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style = len(style_layers)

        # Load pretrained MobileNetV2
        base = mobilenet_v2.MobileNetV2(include_top=False,
                                        weights='imagenet',
                                        input_shape=(img_size,img_size,3))
        base.trainable = False

        # Collect outputs from specified style and content layers
        outs = [base.get_layer(n).output for n in style_layers+content_layers]

        # Build encoder model that outputs these intermediate activations
        self.encoder = Model([base.input], outs)

    def call(self, x):
        # Pass input image through MobileNet encoder
        outs = self.encoder(x)

        # Compute Gram matrices for style outputs (first part of outs)
        style_outs = [gram_matrix(o) for o in outs[:self.num_style]]

        # Return dictionary with style and content features
        return {
            'style': {n:v for n,v in zip(self.style_layers, style_outs)},
            'content': {n:v for n,v in zip(self.content_layers, outs[self.num_style:])}
        }


# Preprocess input PIL image for MobileNetV2
def preprocess_for_model(pil_img, img_size):
   # Convert PIL → numpy [0,1]
    x = np.array(pil_img).astype(np.float32)/255.0
    # Resize to target size
    x = tf.image.resize(x, (img_size,img_size), antialias=True)
    # Apply MobileNet-specific preprocessing
    x = mobilenet_v2.preprocess_input(x*255.0)
    return x[None,...]


# Convert MobileNet output ([-1,1]) back to [0,1] float image
def deprocess_from_model(x_minus1_to1):
    # Remove batch dimension if present
    x = tf.squeeze(x_minus1_to1,0)
    x = tf.clip_by_value((x+1.0)/2.0, 0.0, 1.0)
    return x.numpy().astype(np.float32)


# Blend multiple style targets for MobileNet
def blended_style_targets(extractor, style_pils, blend_weights, img_size):
    accum = None
    for img, w in zip(style_pils, blend_weights):
        # Extract style features for current image
        st = extractor(preprocess_for_model(img, img_size))['style']
        # First style image to initialise accumulator
        if accum is None:
            accum = {k: v*w for k,v in st.items()}
        # Add weighted style features to accumulator
        else:
            [accum.update({k: accum[k] + st[k]*w}) for k in accum]

    # Return weighted blended style targets
    return accum


# Laplacian operator for edge loss

def laplacian(img_minus1_to1):
    # Convert input image from [-1, 1] range to [0, 1]
    x = (img_minus1_to1 + 1.0) / 2.0

    # Define Laplacian kernel (edge detector)
    k = tf.constant([[0., -1., 0.],
                     [-1., 4., -1.],
                     [0., -1., 0.]], tf.float32)

    # Reshape kernel to 4D shape [H,W,in_channels,out_channels]
    k = tf.reshape(k, [3, 3, 1, 1])

    # Repeat kernel for all 3 RGB channels
    k = tf.repeat(k, 3, axis=2)

    # Apply convolution (SAME padding keeps size)
    return tf.nn.conv2d(x, k, strides=1, padding='SAME')


# Multi-stage optimization loop (224 → 512 → 768 )

def run_stage(content_pil, style_pils, blend_weights, img_size,
              steps, lr_start, lr_end, alpha_content, beta_style,
              tv_weight=1e-6, edge_weight=5e-3, init_from=None):

    # Preprocess the content image to MobileNet format at target resolution
    content = preprocess_for_model(content_pil, img_size)

    # Initialize MobileNet-based extractor with style/content layers
    extractor = MNetStyleContentModel(STYLE_LAYERS, CONTENT_LAYERS, img_size)

    # Get blended style features from all style images
    style_tgt = blended_style_targets(extractor, style_pils, blend_weights, img_size)

    # Get content target features
    content_tgt = extractor(content)['content']

    # If no initialization image is passed
    if init_from is None:
        # Use weighted mix of content and first style image as seed
        style_seed = preprocess_for_model(style_pils[0], img_size)
        seed = tf.clip_by_value(((content + 1) / 2.0) * 0.6 + ((style_seed + 1) / 2.0) * 0.4, 0, 1)
        image = tf.Variable(mobilenet_v2.preprocess_input(seed * 255.0))
    else:
        # Upscale previous stage output to current size and use it as init
        up = tf.image.resize(init_from, (img_size, img_size), method='bicubic', antialias=True)
        image = tf.Variable(up)
    lr_schedule = tf.keras.optimizers.schedules.CosineDecay(lr_start, steps, alpha=lr_end / lr_start)
    opt = tf.optimizers.Adam(lr_schedule)

    # Normalize style loss by total style weights
    denom = sum(STYLE_LAYER_WEIGHTS.values())

    # Define one training step
    @tf.function
    def train_step(img):
        with tf.GradientTape() as tape:
            # Extract features from current image
            outs = extractor(img)
            s, c = outs['style'], outs['content']

            # Compute style loss (weighted MSE across layers)
            s_loss = tf.add_n([
                STYLE_LAYER_WEIGHTS[k] * tf.reduce_mean((s[k] - style_tgt[k]) ** 2)
                for k in s
            ]) * (beta_style / denom)

            # Compute content loss (MSE across layers)
            c_loss = tf.add_n([
                tf.reduce_mean((c[k] - content_tgt[k]) ** 2)
                for k in c
            ]) * (alpha_content / len(CONTENT_LAYERS))

            # Add total variation loss (encourages smoothness)
            tv = tf.reduce_mean(tf.image.total_variation(img)) * tv_weight

            # Add edge loss (match Laplacian edges of content and stylized)
            edge = tf.reduce_mean((laplacian(img) - laplacian(content)) ** 2) * edge_weight

            # Final total loss
            loss = s_loss + c_loss + tv + edge

        # Compute gradients of loss wrt. image
        grad = tape.gradient(loss, img)

        # Clip gradients to avoid exploding updates
        grad = tf.clip_by_norm(grad, 10.0)

        # Apply optimizer step
        opt.apply_gradients([(grad, img)])

        # Clamp image values back into valid range [-1, 1]
        img.assign(tf.clip_by_value(img, -1, 1))
        return loss

    # Run training loop for the specified number of steps
    for _ in range(steps):
        _ = train_step(image)

    # Return optimized image tensor
    return image

# Gradio callback for MobileNet style transfer run

def stylize_ui_mnet(content, s1, s2, s3, s4, s5,
                    w1, w2, w3, w4, w5,
                    alpha, beta, preserve_colour, foreground_only, sharpness):

    # Collect style images and normalize weights
    style_imgs, blend_weights = collect_styles_and_weights(
        [s1, s2, s3, s4, s5],
        [w1, w2, w3, w4, w5]
    )

    # Stage 1: Optimize at 224x224 resolution
    out_224 = run_stage(content, style_imgs, blend_weights, 224, 400, 0.05, 0.01, alpha, beta)
    # Stage 2: Optimize at 512x512 resolution, initialized from stage 1
    out_512 = run_stage(content, style_imgs, blend_weights, 512, 350, 0.03, 0.006, alpha * 2, beta, init_from=out_224)
    # Stage 3: Optimize at 768x768 resolution, initialized from stage 2
    out_768 = run_stage(content, style_imgs, blend_weights, 768, 300, 0.02, 0.004, alpha * 3, beta, init_from=out_512)

    # Convert final output tensor back to [0,1] float image
    stylized = deprocess_from_model(out_768)

    # Resize final result to 512x512 for UI consistency
    stylized = tf.image.resize(stylized, (512, 512), antialias=True, method='bicubic').numpy()

    # Convert content image to float [0,1] numpy
    content_np = to_float01(np.array(content))

    # Resize content to 512x512 for post-processing steps
    content_resized = np.array(
        Image.fromarray((content_np * 255).astype(np.uint8)).resize((512, 512), Image.BILINEAR)
    ).astype(np.float32) / 255.0

    # If user enabled colour preservation
    if preserve_colour:
        # Retain colours from the original content image and combine with the textures from the stylised image
        stylized = apply_colour_preservation(stylized, content_resized)

    # If user enabled sharpness, apply unsharp mask filter
    if sharpness > 0:
        stylized = apply_sharpness(stylized, sharpness)

    # If user enabled foreground-only styling, mask out background
    if foreground_only:
        mask = make_black_mask_from_np(content_resized, threshold=0.05)
        stylized = mask * stylized + (1.0 - mask) * content_resized

    # Return final image clipped into [0,1]
    return np.clip(stylized, 0, 1)


# Dispatch routes request to chosen NST model
def unified_stylize(model_choice, content,
                    s1, s2, s3, s4, s5,
                    w1, w2, w3, w4, w5,
                    alpha, beta, preserve_colour, foreground_only, sharpness):

    # Run AdaIN model if selected
    if model_choice == "AdaIN":
        out = stylize_ui_adain(content, s1, s2, s3, s4, s5,
                               w1, w2, w3, w4, w5,
                               alpha, beta, preserve_colour, foreground_only, sharpness)

    # Run VGG optimizer if selected
    elif model_choice == "VGG Optimizer":
        out = stylize_ui_vgg(content, s1, s2, s3, s4, s5,
                             w1, w2, w3, w4, w5,
                             alpha, beta, preserve_colour, foreground_only, sharpness)

    # Run MobileNet optimizer if selected
    elif model_choice == "MobileNet Optimizer":
        out = stylize_ui_mnet(content, s1, s2, s3, s4, s5,
                              w1, w2, w3, w4, w5,
                              alpha, beta, preserve_colour, foreground_only, sharpness)

    # Compute SSIM and LPIPS metrics for evaluation
    style_imgs, blend_weights = collect_styles_and_weights([s1, s2, s3, s4, s5],
                                                      [w1, w2, w3, w4, w5])
    ssim_val, lpips_c, lpips_s = compute_metrics(out, content, style_imgs, blend_weights)

    # Return stylised image and metrics
    return out, ssim_val, lpips_c, lpips_s

# Gradio User Interface
with gr.Blocks() as demo:

    # Title and description
    # Higher SSIM , better performance
    # Lower LPIPS , better performance
    gr.Markdown("##  3-in-1 NST App : AdaIN | VGG Optimizer | MobileNet Optimizer")
    gr.Markdown(
        "After you click \"Stylise\" — your chosen model makes the stylised image, "
        "then metrics are computed:\n"
    )

    # Create dropdown menu for model selection
    model_choice = gr.Dropdown(
        ["AdaIN", "VGG Optimizer", "MobileNet Optimizer"],
        value="AdaIN",
        label="Select Model"
    )

    # Upload option for content image
    content_input = gr.Image(
        label="Content Image",
        type="pil",
        image_mode="RGB",
        height=256,
        width=256
    )

    # Row of 5 style image upload option facilities
    with gr.Row():
        style_input1 = gr.Image(label="Style 1", type="pil")
        style_input2 = gr.Image(label="Style 2", type="pil")
        style_input3 = gr.Image(label="Style 3", type="pil")
        style_input4 = gr.Image(label="Style 4", type="pil")
        style_input5 = gr.Image(label="Style 5", type="pil")

    # Initialise Sliders to adjust style weights of 5 style images
    with gr.Row():
        blend1 = gr.Slider(0, 1, 1, step=0.01, label="Style 1 Strength")
        blend2 = gr.Slider(0, 1, 0, step=0.01, label="Style 2 Strength")
        blend3 = gr.Slider(0, 1, 0, step=0.01, label="Style 3 Strength")
        blend4 = gr.Slider(0, 1, 0, step=0.01, label="Style 4 Strength")
        blend5 = gr.Slider(0, 1, 0, step=0.01, label="Style 5 Strength")

    # Controls for content/style weights , colour preservation , foreground styling and sharpness controls
    with gr.Row():
        alpha_slider = gr.Slider(1, 5000, 1000, step=10, label="α (Content Weight)")
        beta_slider = gr.Slider(1e-3, 1.0, 1e-2, step=1e-3, label="β (Style Weight)")
        preserve_colour = gr.Checkbox(label="Apply Colour Preservation", value=False)
        foreground_only = gr.Checkbox(label="Apply Foreground-Aware Styling", value=False)
        sharpness = gr.Slider(0, 1, 0.5, step=0.05, label="Style Sharpness")

    # Run button
    run_btn = gr.Button("Stylise")

    # Output row to display final image and metrics
    with gr.Row():
        output_img = gr.Image(label="Stylised Output", image_mode="RGB", height=512, width=512)
        with gr.Column():
            ssim_metric = gr.Number(label="SSIM vs Content (↑ better)")
            lpips_content_metric = gr.Number(label="LPIPS vs Content (↓ better)")
            lpips_style_metric = gr.Number(label="LPIPS vs Blended Style (↓ better)")

    # Connect button to unified stylisation function
    run_btn.click(
        fn=unified_stylize,
        inputs=[model_choice, content_input,
                style_input1, style_input2, style_input3, style_input4, style_input5,
                blend1, blend2, blend3, blend4, blend5,
                alpha_slider, beta_slider, preserve_colour, foreground_only, sharpness],
        outputs=[output_img, ssim_metric, lpips_content_metric, lpips_style_metric]
    )

# Launch app
demo.launch()


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.8/53.8 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hSetting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


100%|██████████| 233M/233M [00:01<00:00, 173MB/s]


Loading model from: /usr/local/lib/python3.12/dist-packages/lpips/weights/v0.1/alex.pth
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6da973ff3997231637.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


