In [1]:
# import all required libraries
import lpips
from torchvision import transforms
import imageio
import gradio as gr
import torch
import subprocess
import clip
import tqdm
import tqdm.auto
import matplotlib.pyplot as plt
import time
import threading
import os
import numpy as np
from PIL import Image

In [2]:
# global variables for accessing video and frames
video_path = "output_video.mp4"
model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
output_frames_path = "output_frames.mp4"
denoise_steps = [0] * 50
load_time = 0
warm_up_time = 0
denoise_time = 0
compile_time = 0
is_LoRA_applied = False
is_operator_fusion_applied = False
is_quantization_applied = False
is_CPU_offload_applied = False
pipe = None
model = None
preprocess = None
model_loaded = False

In [3]:
# modify the tqdm library to note time at each inference step
original_tqdm = tqdm.auto.tqdm
class TqdmSpy(original_tqdm):
    def update(self, n=1):
        super().update(n)
        global denoise_steps
        denoise_steps[self.n-1] = time.time()

In [4]:
# patch modified tqdm to diffusers
import diffusers
diffusers.utils.tqdm = TqdmSpy
tqdm.tqdm = TqdmSpy
tqdm.auto.tqdm = TqdmSpy

In [5]:
# library for extracting frames
import decord
decord.bridge.set_bridge("torch")
from decord import VideoReader

In [6]:
# check device availability
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.is_available(), torch.cuda.get_device_name(0)

(True, 'NVIDIA L4')

In [7]:
# LPIPS loss function and transform definition
loss_fn = lpips.LPIPS(net='vgg')

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]




Loading model from: /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/lpips/weights/v0.1/vgg.pth


In [8]:
from diffusers import (
    DPMSolverMultistepScheduler,
    DDIMScheduler,
    LMSDiscreteScheduler
)

def set_diffusion_scheduler(pipe, scheduler_type):
    config = pipe.scheduler.config

    if scheduler_type == "dpm":
        pipe.scheduler = DPMSolverMultistepScheduler.from_config(config)
        print("[INFO] Scheduler set to DPM Solver Multistep")
    else:
        print("[INFO] Using default scheduler (no change)")


In [9]:
# helper function to load model while recording time to load the model
def load_model(data_type, pipeline_quant_config, scheduler_type, to_offload=True):
    global load_time
    global pipe
    global model
    global preprocess
    pipe = None
    model = None
    preprocess = None

    start_time = time.time()

    map = None

    if to_offload:
        print("Applying CPU offloading...Choosing device-map: balanced")
        map = "balanced"
    
    pipe = diffusers.DiffusionPipeline.from_pretrained(
        model_id,
        device_map=map,
        torch_dtype=data_type,
        quantization_config=pipeline_quant_config
    )

    end_time = time.time()

    # load model for CLIP Score
    model, preprocess = clip.load("ViT-B/32", device=device)

    # Set scheduler
    set_diffusion_scheduler(pipe, scheduler_type=scheduler_type)

    patch_encode_prompt(pipe)

    load_time = end_time - start_time

In [10]:
# helper function for encoding prompt
def patch_encode_prompt(pipe):
    text_encoder_device = pipe.text_encoder.get_input_embeddings().weight.device
    orig_encode_prompt = pipe.encode_prompt

    def patched_encode_prompt(*args, **kwargs):
        if "device" not in kwargs or kwargs["device"] is None:
            kwargs["device"] = text_encoder_device
        return orig_encode_prompt(*args, **kwargs)

    pipe.encode_prompt = patched_encode_prompt

In [11]:
# helper function for extracting frames from video for CLIP score
def extract_frames(num_frames=8):
    vr = VideoReader(video_path)
    total_frames = len(vr)
    indices = torch.linspace(0, total_frames - 1, steps=num_frames).long()
    batch = vr.get_batch(indices).asnumpy()  # shape: (T, H, W, C)

    return [Image.fromarray(frame) for frame in batch]

In [12]:
# helper function for extracting frames from video for LPIPS score
def extract_frames_imageio(video_path):
    reader = imageio.get_reader(video_path)
    frames = []
    for frame in reader:
        frames.append(Image.fromarray(frame))
    reader.close()
    return frames

In [13]:
#helper function to plot graph of denoising
def denoise_graph():
    global denoise_steps
    global warm_up_time
    global denoise_time
    dns2 = [denoise_steps[i] - denoise_steps[i-1] for i in range(1, len(denoise_steps))]
    denoise_time = sum(dns2)
    plt.plot(list(range(len(dns2))), dns2)
    plt.xlabel('Denoising step')
    plt.ylabel('Time taken')
    plt.title('Denoising')
    graph = plt.gcf()
    return graph

In [14]:
# helper function for computing CLIP score
def compute_clip_score(frames, text):
    text_token = clip.tokenize([text]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_token).float()
        frame_features = []
        for frame in frames:
            image_input = preprocess(frame).unsqueeze(0).to(device)
            image_feature = model.encode_image(image_input).float()
            frame_features.append(image_feature)
        frame_features = torch.stack(frame_features).squeeze(1)
        text_features = text_features / text_features.norm(dim=-1, keepdim=True)
        frame_features = frame_features / frame_features.norm(dim=-1, keepdim=True)
        similarities = (frame_features @ text_features.T).squeeze()
        return similarities.mean().item()

In [15]:
# helper function for computing LPIPS score
def compute_temporal_lpips(frames):
    if not frames or len(frames) < 2:
        return float("nan")

    scores = []

    for i in range(len(frames) - 1):
        try:
            img1 = transform(frames[i]).unsqueeze(0)
            img2 = transform(frames[i + 1]).unsqueeze(0)

            with torch.no_grad():
                dist = loss_fn(img1, img2)
                score = dist.item()
                scores.append(score)

        except Exception as e:
            continue

    if not scores:
        return float("nan")

    avg_score = np.mean(scores)
    return avg_score

In [16]:
# helper functions to get GPU stats
def get_gpu_stats():
    try:
        result = subprocess.check_output([
            "nvidia-smi",
            "--query-gpu=memory.used,memory.free,utilization.gpu,temperature.gpu,power.draw",
            "--format=csv,nounits,noheader"
        ], encoding='utf-8')
        memory_used, memory_free, utilization, temp, power = result.strip().split(', ')
        return {
            "memory_used": f"{memory_used} MB",
            "memory_free": f"{memory_free} MB",
            "utilization": f"{utilization} %",
            "temperature": f"{temp} °C",
            "power": f"{power} W"
        }
    except Exception as e:
        return {
            "memory_used": "Error",
            "memory_free": "Error",
            "utilization": "Error",
            "temperature": "Error",
            "power": "Error"
        }
def get_gpu_info_only():
    stats = get_gpu_stats()
    return (
        stats["memory_used"],
        stats["memory_free"],
        stats["utilization"],
        stats["temperature"],
        stats["power"]
    )

In [17]:
# helper function to plot GPU VRAM usage
gpu_times=[]
gpu_vram_usage=[]

def only_vram():
  statss = get_gpu_stats()
  return(statss["memory_used"])

# Event to signal when to stop monitoring
stop_event = threading.Event()
stop_event.clear()

def monitor_vram():
    start = time.time()
    while not stop_event.is_set():
        mem = only_vram()
        t = time.time() - start
        if mem != "Error":
            v = int(mem.split()[0])
        else:
            v = 0
        gpu_times.append(t)
        gpu_vram_usage.append(v)
        time.sleep(0.25)
def gpu_usage_graph():
    global gpu_times
    global gpu_vram_usage

    plt.figure(figsize=(10, 5))
    plt.plot(gpu_times, gpu_vram_usage, marker='o', color='blue')
    plt.xlabel('Time Elapsed (seconds)')
    plt.ylabel('VRAM Used (MB)')
    plt.title('GPU VRAM Usage During Video Generation')
    plt.grid(True)
    graph = plt.gcf()
    return graph

In [17]:
# helper function to implement LoRA - loads weight from the git repo
def apply_LoRA():
    global is_LoRA_applied
    if (is_LoRA_applied == False):
      lora_path = "./Wan2.1-T2V-1.3B-crush-smol-v0/"
      print("Starting LoRA application...")

      # Check if the path exists
      if not os.path.exists(lora_path):
          print(f"LoRA path does not exist: {lora_path}")
          return

      try:
          print(f"Loading LoRA weights from: {lora_path}")
          pipe.load_lora_weights(lora_path)
          print("LoRA weights loaded successfully.")
      except Exception as e:
          print(f"Error loading LoRA weights: {e}")
          return

      try:
          print("Enabling LoRA...")
          pipe.enable_lora()
          print("LoRA enabled.")
      except Exception as e:
          print(f"Error enabling LoRA: {e}")
          return

      try:
          print("Attempting to fuse LoRA...")
          pipe.fuse_lora()
          is_LoRA_applied = True
          print("LoRA fused successfully.")
      except Exception as e:
          print(f"LoRA fuse not supported: {e}")
    else:
      print("LoRA already applied, so skipping...")

KeyboardInterrupt: 

In [19]:
# helper function to implement operator fusion - covers model transformer and VAE
def apply_operator_fusion():
    global is_operator_fusion_applied
    global compile_time
    if (is_operator_fusion_applied == False):
      startt = time.time()
      print("Starting operator fusion...")

      try:
          print("Compiling transformer...")
          pipe.transformer = torch.compile(pipe.transformer, mode="reduce-overhead")
          print("Transformer compiled successfully.")
      except Exception as e:
          print(f"Error compiling transformer: {e}")

      try:
          print("Compiling VAE...")
          pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead")
          print("VAE compiled successfully.")
      except Exception as e:
          print(f"Error compiling VAE: {e}")
      endd = time.time()
      compile_time = endd - startt

      is_operator_fusion_applied = True
    else:
      print("Operator Fusion already applied, so skipping...")

In [20]:
# helper function to apply quantization - 4 bit
def apply_quantization():
    print("Applying quantization configuration...")

    dtype = torch.float16
    print(f"Chosen dtype: {dtype}")

    quant_config = diffusers.quantizers.PipelineQuantizationConfig(
        quant_backend="bitsandbytes_4bit",
        quant_kwargs={
            "load_in_4bit": True,
            "bnb_4bit_quant_type": "nf4",
            "bnb_4bit_compute_dtype": torch.float16
        },
        components_to_quantize=["transformer", "text_encoder", "vae"]
    )

    print("Quantization config created.")
    print(f"Quant backend: {quant_config.quant_backend}")
    print(f"Quant kwargs: {quant_config.quant_kwargs}")
    print(f"Components to quantize: {quant_config.components_to_quantize}")

    return dtype, quant_config

In [21]:
def generate_video(prompt, negative_prompt="Blurry, unrealistic, shaky", frames=60, fps=12, resolution=480, inference_steps=50, guidance_scale=6.0, seed=42):

    global denoise_steps
    global warm_up_time
    global denoise_time
    
    denoise_steps = [0] * inference_steps

    # adjust frame count as per WAN's requirement
    frames = 4 * frames + 1

    # set height and width based on resolution
    height = resolution
    width = 832  # default

    if height == 240:
        width = 416
    elif height == 720:
        width = 1248
    elif height == 1080:
        width = 1872

    # set seed for reproducibility
    torch.manual_seed(seed)
    generator = torch.Generator().manual_seed(seed)

    # measure generation time
    start_time = time.time()

    # generate video frames
    output = pipe(
        prompt,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=frames,
        guidance_scale=guidance_scale,
        generator=generator,
        num_inference_steps=inference_steps
    )

    end_time = time.time()

    grph = denoise_graph()
    total_latency = end_time - start_time
    warm_up_time = total_latency - denoise_time
    frame_latency = denoise_time / frames
    Throughput = frames / total_latency

    video = output.frames

    if isinstance(video, np.ndarray):
        video = np.squeeze(video)
        video = (video * 255).clip(0, 255).astype("uint8")
    else:
        raise TypeError("Unexpected output format from pipeline")

    # convert frames to PIL Images
    frame_images = [Image.fromarray(frame) for frame in video]

    # export videos
    diffusers.utils.export_to_video(frame_images, video_path, fps=fps)
    diffusers.utils.export_to_video(frame_images, output_frames_path, fps=1)

    return video_path, f"{total_latency:.3f} s", f"{frame_latency:.3f} s", f"{Throughput:.3f} fps", grph

In [22]:
negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"

In [None]:
themeeeeee = gr.themes.Base(
    primary_hue="indigo",
    secondary_hue="gray",
    radius_size=gr.themes.Size(
        xxs="6px", xs="6px", sm="8px", md="10px", lg="12px", xl="14px", xxl="16px"
    ),
    spacing_size=gr.themes.Size(
        xxs="2px", xs="4px", sm="6px", md="10px", lg="16px", xl="24px", xxl="32px"
    )
).set(
    body_background_fill="linear-gradient(135deg, #0f2027, #203a43, #2c5364)",
    body_text_color="white",
    block_background_fill="rgba(255, 255, 255, 0.08)",
    block_border_color="rgba(255, 255, 255, 0.2)",
    block_shadow="0 12px 40px rgba(0, 0, 0, 0.4)",
    input_background_fill="rgba(255, 255, 255, 0.1)",
    input_border_color="rgba(255, 255, 255, 0.2)",
    button_primary_background_fill="rgba(99, 102, 241, 0.85)",
    button_primary_text_color="white",
    button_primary_background_fill_hover="rgba(99, 102, 241, 1)"
)

css_reset = """
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap');

body, #root, .gradio-container {
    font-family: 'Inter', sans-serif !important;
    background: linear-gradient(135deg, #0f2027, #203a43, #2c5364);
    perspective: 1000px;
    overflow-x: hidden;
    animation: fadeIn 1s ease-in-out;
    color: #00FFFF !important;
    text-shadow: 0 0 5px #00FFFF, 0 0 10px #00FFFF !important;
}

#root {
    transform-style: preserve-3d;
}

.gradio-container > * {
   /* transform: rotateX(1deg) rotateY(-2deg); */
    transition: transform 0.4s ease, box-shadow 0.4s ease;
    backdrop-filter: blur(14px);
    -webkit-backdrop-filter: blur(14px);
    border-radius: 20px;
    background: rgba(255, 255, 255, 0.05);
    box-shadow: 0 12px 40px rgba(0, 0, 0, 0.3);
    border: 1px solid rgba(255, 255, 255, 0.15);
    margin: 30px auto;
    padding: 25px;
    width: 85% !important;
    box-sizing: border-box;
    color: white !important;
}

/* Flashy animated text */
@keyframes flashyColors {
    0%   { color: #00FFFF; text-shadow: 0 0 10px #00FFFF; }  /* Cyan */
    33%  { color: #66CCFF; text-shadow: 0 0 10px #66CCFF; }  /* Light Blue */
    66%  { color: #00FF00; text-shadow: 0 0 10px #00FF00; }  /* Green */
    100% { color: #00FFFF; text-shadow: 0 0 10px #00FFFF; }  /* Back to Cyan */
}


h1, h2, h3, h4, h5, p, span, label, button {
    animation: flashyColors 15s infinite alternate !important;
    text-shadow: 0 1px 1px #000, 0 2px 2px #00FFFF, 0 0 10px rgba(0,255,255,0.6);
    font-weight: 600;
}
h1, h2, h3 {
    text-align: center;
    font-size: 2.2rem;
}

/* Buttons */
.gr-button {
    position: relative;
    background: linear-gradient(to bottom, #00FFFF, #00BFFF) !important;
    color: #000 !important;
    font-weight: bold;
    border-radius: 14px !important;
    padding: 12px 20px;
    border: none;
    box-shadow: 0 4px 0 #009E9E, 0 6px 12px rgba(0, 255, 255, 0.4);
    transition: transform 0.2s ease, box-shadow 0.2s ease;
}
.gr-button:hover {
    transform: scale(1.05);
    box-shadow: 0 6px 0 #008B8B, 0 10px 15px rgba(0, 255, 255, 0.5);
}
.gr-button:active {
    transform: translateY(2px);
    box-shadow: 0 2px 0 #007777, 0 6px 10px rgba(0, 255, 255, 0.3);
}
.gr-button.clicked::after {
    content: '🔘';
    position: absolute;
    right: 15px;
    top: 50%;
    transform: translateY(-50%);
    font-size: 1.2rem;
    animation: fadeOut 0.2s forwards;
}
@keyframes fadeOut {
    0% { opacity: 1; }
    80% { opacity: 0.5; }
    100% { opacity: 0; content: ''; }
}

/* Form fields */
.gr-textbox.gr-box, textarea, input {
    background: rgba(40, 40, 40, 0.95) !important;
    color: #FFFFFF !important;
    border: 1px solid #00FFFF !important;
    border-radius: 12px;
    box-shadow: 0 0 10px rgba(0, 255, 255, 0.4);
    animation: none !important;
    text-shadow: none !important;
}

/* Slider */
.gr-slider input[type="range"] {
    accent-color: #00FFFF !important;
}

/* Accordion */
.gr-accordion {
    background-color: #00ffff !important;
    color: #ffffff !important;
    border-radius: 8px;
    border: 1px solid #444;
    padding: 4px;
}
.gr-accordion .prose {
    color: #1e1e2f !important;
    font-weight: bold;
}

/* Checkboxes */
input[type="checkbox"] {
    appearance: none;
    -webkit-appearance: none;
    background-color: rgba(20, 20, 20, 0.8);
    border: 2px solid #00FFFF;
    border-radius: 6px;
    width: 20px;
    height: 20px;
    cursor: pointer;
    position: relative;
    transition: all 0.2s ease-in-out;
    box-shadow: 0 0 5px rgba(0, 255, 255, 0.4);
    margin-right: 8px;
    vertical-align: middle;
}

input[type="checkbox"]:checked {
    background-color: #00FFFF;
    box-shadow: 0 0 10px rgba(0, 255, 255, 0.8);
}

input[type="checkbox"]::after {
    content: '';
    position: absolute;
    top: 50%;
    left: 50%;
    transform: translate(-50%, -50%);
    font-size: 14px;
    color: #000;
    font-weight: bold;
    opacity: 0;
    transition: opacity 0.2s ease-in-out;
    animation: none !important;
    text-shadow: none !important;
}

input[type="checkbox"]:checked::after {
    content: '🔘';
    color: #FF0000 !important;
    opacity: 1;
}

input[type="radio"] {
    appearance: none;
    -webkit-appearance: none;
    border: 2px solid #ccc;
    width: 20px;
    height: 20px;
    border-radius: 50%;
    position: relative;
    cursor: pointer;
    outline: none;
}

/* Labels */
label, .gr-radio-label span, .gr-check-label span {
    font-weight: 700;
}

@keyframes fadeIn {
    from { opacity: 0; transform: translateY(20px); }
    to { opacity: 1; transform: translateY(0); }
}

#main-header h1 {
    font-size: 3rem !important;
    font-weight: 800;
}

#custom-radio input[type="radio"] {
    appearance: none;
    -webkit-appearance: none;
    width: 1em;
    height: 1em;
    margin-right: 10px;
    border: 2px solid #ccc;
    border-radius: 50%;
    position: relative;
    cursor: pointer;
}

/* Show a custom emoji when selected */
#custom-radio input[type="radio"]:checked::before {
    content: '🔘';
    position: absolute;
    font-size: 1em;
    top: -2px;
    left: -2px;
    color: red;
}

</style>
"""

with gr.Blocks(theme=themeeeeee) as demo:
    gr.HTML(css_reset)
    gr.Markdown("# Perfect Chinmayee Shambhavi Wifey Ashika", elem_id="main-header")
    gr.Markdown("<hr>")

    with gr.Group():
        with gr.Row():
            gr.Markdown("## <div style='text-align:center; padding:15px;'>Metrics</div>")
        with gr.Row():
            with gr.Accordion("Hardware", open=True):
                with gr.Row():
                    mem_used = gr.Textbox(label="Memory used", interactive=False)
                    mem_free = gr.Textbox(label="Memory free", interactive=False)
                    gpu_util = gr.Textbox(label="GPU utilization", interactive=False)
                    temp = gr.Textbox(label="Temperature", interactive=False)
                    powe = gr.Textbox(label="Power draw", interactive=False)
                    ram_plot = gr.Plot(label="VRAM vs Time Graph")
            with gr.Accordion("Efficiency", open=True):
                with gr.Row():
                    warmup_time_box = gr.Textbox(label="Warm Up Time", interactive=False)
                    clip_latency_box = gr.Textbox(label="Clip-wise latency", interactive=False)
                    frame_latency_box = gr.Textbox(label="Frame-wise latency", interactive=False)
                    throughput_box = gr.Textbox(label="Throughput", interactive=False)
                    dngraphoutput = gr.Plot(label="Time vs Denoising-Steps Graph")
        with gr.Row():
            with gr.Accordion("Accuracy", open=True):
                with gr.Row():
                    lpips_score_box = gr.Textbox(label="LPIPS score", interactive=False)
                    clip_score_box = gr.Textbox(label="CLIP score", interactive=False)
            with gr.Accordion("Others", open=True):
                with gr.Row():
                    compile_time_box = gr.Textbox(label="Compile time", interactive=False)
                    lolu = gr.Textbox(label="Load time", interactive=False)
    with gr.Row():
        with gr.Group():
            gr.Markdown("## <div style='text-align:center; padding:15px;'>Parameters</div>")
            res = gr.Radio(choices=[240, 480, 720, 1080], value=480, label="Output resolution", interactive=True, elem_id="custom-radio")
            with gr.Row():
                prompt = gr.Textbox(placeholder="e.g. A cat walking on moon", label="Prompt")
                nprompt = gr.Textbox(value=negative_prompt, label="Negative prompt")
            with gr.Row(equal_height=True):
                 fps = gr.Slider(minimum=1, maximum=120, label="FPS", value=12)
                 frames = gr.Slider(minimum=1, maximum=360, label="Number of frames", value=60)
            with gr.Row(equal_height=True):
                 seed_slider = gr.Slider(0, 100, value=42, step=1, label="Seed", interactive=True)
            with gr.Row(equal_height=True):
                 guidance_slider = gr.Slider(4.0, 15.0, value=6.0, step=0.1, label="Guidance Scale", interactive=True)
                 steps_slider = gr.Slider(1, 100, value=50, step=1, label="Inference Steps", interactive=True)

            optimization_mode = gr.Radio(
                label="Choose Optimization Mode",
                choices=["None", "Best", "Individual Techniques"],
                value="Best",
                interactive=True,
                elem_id="custom-radio"
            )

            individual_opts_row = gr.Row(visible=False)
            Schedulers = gr.Radio(choices=['DPM (faster Speed)'], label="Diffusion Schedulers", elem_id="custom-radio")
            to_load_model = gr.Checkbox(value=False, label="Reload the model")
            with individual_opts_row:
                selected_techniques = gr.CheckboxGroup(
                    label="Select Optimization Techniques",
                    choices=["Quantization", "LoRA", "Operator Fusion", "CPU offloading"],
                )

            def toggle_checkboxes(opt_mode):
                return gr.update(visible=(opt_mode is "Individual Techniques"))

            optimization_mode.change(
                fn=toggle_checkboxes,
                inputs=optimization_mode,
                outputs=individual_opts_row
            )

        with gr.Group():
            gr.Markdown("## <div style='text-align:center; padding:15px;'>Output</div>")
            output = gr.Video(label="Generated video")

    generate = gr.Button("Generate")

    # timers for GPU and load time monitoring
    timer = gr.Timer()
    timer.tick(fn=get_gpu_info_only, inputs=[], outputs=[mem_used, mem_free, gpu_util, temp, powe])

    # wrapper with metrics and optimization techniques integration
    def wrapper(prompt, nprompt, frames, fps, res, opt_mode, opt_tech, Schedulers, modell, guidance_scale, inference_steps, seed, progress=gr.Progress()):
    
        global compile_time
        global model_loaded
        global pipe

        data_type = None
        pipe_quant_config = None

        # start measuring GPU VRAM usage
        monitor_thread = threading.Thread(target=monitor_vram)
        monitor_thread.start()

        opt = opt_mode

        # give choice corresponding to technique
        if "Individual Techniques" in opt_mode:
            opt = opt_tech

        # check if the model needs to be reloaded
        if ((modell) or (model_loaded == False)):

            print("Optimization mode:", opt)

            # apply quantization if it's in the selected list or "Best" mode
            if ('Quantization' in opt or 'Best' in opt):
                data_type, pipe_quant_config = apply_quantization()
            else:
                data_type = None
                pipe_quant_config = None

            # select the scheduler
            if Schedulers == 'DPM (faster Speed)':
                scheduler_ty = "dpm"
            else:
                scheduler_ty = "default"

            to_offload = False

            if ('CPU offloading' in opt or 'Best' in opt):
                to_offload = True

            # load model
            load_model(
                data_type,
                pipe_quant_config,
                scheduler_type=scheduler_ty,
                to_offload=to_offload
            )

            model_loaded = True

        # apply techniques
        if ('LoRA' in opt or 'Best' in opt):
            apply_LoRA()
        if ('Operator Fusion' in opt or 'Best' in opt):
            apply_operator_fusion()

        video_path, clip_latency, frame_latency, throughputt, dngraph = generate_video(
            prompt=prompt,
            negative_prompt=nprompt,
            frames=frames,
            fps=fps,
            resolution=res,
            seed=seed,
            guidance_scale=guidance_scale,
            inference_steps=inference_steps
        )

        extracted_frames = extract_frames_imageio(video_path)
        lpips_score = compute_temporal_lpips(extracted_frames)

        framess = extract_frames(num_frames=frames)
        clipp_score = compute_clip_score(framess, prompt)

        lpips_display = f"{lpips_score:.3f}" if not np.isnan(lpips_score) else "N/A"
        clip_score_display = f"{clipp_score:.3f}"
        warmup_time_display = f"{warm_up_time:.3f} s"
        lolu_display = f"{load_time:.3f} s"
        compile_time_display = f"{compile_time:.3f} s"

        stop_event.set()
        monitor_thread.join()

        return video_path, clip_latency, frame_latency, lpips_display, clip_score_display, warmup_time_display, lolu_display, throughputt, dngraph, compile_time_display, gpu_usage_graph()
    generate.click(
        fn=wrapper,
        inputs=[prompt, nprompt, frames, fps, res, optimization_mode, selected_techniques, Schedulers, to_load_model ,guidance_slider, steps_slider, seed_slider],
        outputs=[output, clip_latency_box, frame_latency_box, lpips_score_box, clip_score_box, warmup_time_box, lolu, throughput_box, dngraphoutput, compile_time_box, ram_plot]
    )
demo.launch(debug=True, share=True)

  return gr.update(visible=(opt_mode is "Individual Techniques"))


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://071ac225f92499bd3d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Optimization mode: Best
Applying quantization configuration...
Chosen dtype: torch.float16
Quantization config created.
Quant backend: bitsandbytes_4bit
Quant kwargs: {'load_in_4bit': True, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_compute_dtype': torch.float16}
Components to quantize: ['transformer', 'text_encoder', 'vae']
Applying CPU offloading...Choosing device-map: balanced


model_index.json:   0%|          | 0.00/400 [00:00<?, ?B/s]

Fetching 19 files:   0%|          | 0/19 [00:00<?, ?it/s]

scheduler_config.json:   0%|          | 0.00/751 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/854 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00004-of-00005.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/2.89G [00:00<?, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/4.55M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

(…)pytorch_model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

(…)pytorch_model-00002-of-00002.safetensors:   0%|          | 0.00/677M [00:00<?, ?B/s]

(…)ion_pytorch_model.safetensors.index.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/724 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/508M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

You are loading your model in 8bit or 4bit but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|███████████████████████████████████████| 338M/338M [00:07<00:00, 48.3MiB/s]


[INFO] Scheduler set to DPM Solver Multistep
Starting LoRA application...
LoRA path does not exist: ./Wan2.1-T2V-1.3B-crush-smol-v0/
Starting operator fusion...
Compiling transformer...
Transformer compiled successfully.
Compiling VAE...
VAE compiled successfully.


  0%|          | 0/50 [00:00<?, ?it/s]

W0728 21:18:47.564000 1968 /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/_inductor/utils.py:1250] [1/0_1] Not enough SMs to use max_autotune_gemm mode
skipping cudagraphs due to skipping cudagraphs due to cpu device (arg1_1). Found from : 
   File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/diffusers/models/transformers/transformer_wan.py", line 436, in forward
    rotary_emb = self.rope(hidden_states)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/diffusers/models/transformers/transformer_wan.py", line 207, in forward
    freqs = self.freqs.to(hidden_states.device)

W0728 21:19:04.556000 1968 /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:964] [0/8] torch._dynamo hit config.recompile_limit (8)
W0728 21:19:04.556000 1968 /system/conda/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:964] [0/8]    function: 'new_for

Starting LoRA application...
LoRA path does not exist: ./Wan2.1-T2V-1.3B-crush-smol-v0/
Operator Fusion already applied, so skipping...


  0%|          | 0/50 [00:00<?, ?it/s]