In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import sys
sys.path.append('..')

import cv2
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from omegaconf import OmegaConf
from diffusers import ControlNetModel, DDIMScheduler, StableDiffusionPipeline

from iattention import UNION_PIPELINES
from iattention.utils import correct_colors_hist, show_image, show_images



In [2]:
def get_video_info(cap):
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    return height, width, fps, frame_count

In [3]:
config = OmegaConf.load('../configs/default.yaml')
config['pipe_config']['total_steps'] = config['common']['num_inference_steps']
config['unet_config']['total_steps'] = config['common']['num_inference_steps']
config['controlnet_config']['total_steps'] = config['common']['num_inference_steps']
config['common']['unet_from'] = None #'../models/deliberate_v2.safetensors'

In [4]:
pipe = UNION_PIPELINES[config['pipeline']](config)

No model was supplied, defaulted to Intel/dpt-large and revision e93beec (https://huggingface.co/Intel/dpt-large).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of DPTForDepthEstimation were not initialized from the model checkpoint at Intel/dpt-large and are newly initialized: ['neck.fusion_stage.layers.0.residual_layer1.convolution1.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.weight', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'iattention.stablediffusion_controlnet_pipeline.IAttentionSDCPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [5]:
config['common']['input_video_path'] = '../video/input/man.mp4'
config['common']['output_video_path'] = '../video/output/video-from-jupyter.mp4'

cap = cv2.VideoCapture(config['common']['input_video_path'])
orig_height, orig_width, fps, frame_count = get_video_info(cap)

print(f'\n\n[ VIDEO INFO | WxH: {orig_width}x{orig_height} | FPS: {fps} | FRAME COUNT: {frame_count} ]\n\n')

img_h, img_w = config['common']['img_h'], config['common']['img_w']
out_h, out_w = img_h, img_w
if config['common']['original_output_size']:
    out_h, out_w = orig_height, orig_width

writer = cv2.VideoWriter(
    config['common']['output_video_path'], 
    cv2.VideoWriter_fourcc(*'mp4v'), 
    fps, 
    (out_w * 2, out_h),
)

first_image = None
for _ in tqdm(range(frame_count)):
    c_ret, c_image = cap.read()
    if not c_ret or c_image is None:
        break
    image = cv2.resize(c_image, (img_h, img_w))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    result = pipe(image)

    if first_image is None:
        first_image = result.copy()
    else:
        predict_image = correct_colors_hist(first_image, result, config['common']['hist_normalize'])
            
    out_images = [cv2.resize(x, (out_w, out_h)) for x in [image, result]]
    result = np.hstack(out_images)
    result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
    
    writer.write(result)
    

cap.release()
writer.release()



[ VIDEO INFO | WxH: 750x720 | FPS: 30 | FRAME COUNT: 36 ]




100%|███████████████████████████████████████████| 36/36 [02:27<00:00,  4.09s/it]
