# ðŸ§¨ Diffusers meets Video

This colab showcases the new research text-to-video model by Alibaba and its integration with the diffusers library https://huggingface.co/damo-vilab/text-to-video-ms-1.7b 

In [1]:
#@title Check your GPU!
!nvidia-smi

Thu Apr 20 15:31:03 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.108.03   Driver Version: 510.108.03   CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:65:00.0 Off |                  Off |
|  0%   39C    P8    13W / 450W |     22MiB / 24564MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
#@title Setup pipeline
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
from IPython.display import HTML
from base64 import b64encode

pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()

Downloading (â€¦)ain/model_index.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

Downloading (â€¦)_encoder/config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

Downloading (â€¦)tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (â€¦)cheduler_config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

Downloading (â€¦)e7e/unet/config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

Downloading (â€¦)okenizer_config.json:   0%|          | 0.00/755 [00:00<?, ?B/s]

Downloading (â€¦)cial_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

Downloading (â€¦)tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading pytorch_model.fp16.bin:   0%|          | 0.00/681M [00:00<?, ?B/s]

Downloading (â€¦)2e7e/vae/config.json:   0%|          | 0.00/657 [00:00<?, ?B/s]

Downloading (â€¦)torch_model.fp16.bin:   0%|          | 0.00/2.82G [00:00<?, ?B/s]

Downloading (â€¦)torch_model.fp16.bin:   0%|          | 0.00/167M [00:00<?, ?B/s]

## Generate new videos

In [5]:
def read_in_file(filename:str) -> list:
    """
    This functions reads in a text file containing the text prompts,
    this prompts will be used as the input for the T2V model.
    """
    try:
        input_file= open(file=filename, mode="r", encoding="utf-8") 
        sentences = input_file.read()
        sentences= sentences.split("\n")
        input_file.close()
        return sentences
    except FileNotFoundError:
        print(f"The file {filename} doesn't exist.")
        sys.exit(1)

def generate_videos(text_prompts,
                    output_video_path,
                    video_duration_seconds=3, 
                    num_inference_steps=25):
    
    """
    Using a list of text prompts to generate a video for each prompt.
    Then save to the output_video_path.
    """
    
    num_frames = video_duration_seconds * 10
    for prompt in text_prompts:
        video_frames = pipe(prompt, 
                            num_inference_steps=num_inference_steps, 
                            num_frames=num_frames,).frames
        video_path = export_to_video(video_frames,
                     output_video_path=f"{output_video_path}/{prompt}.mp4".replace(" ", "_"))

In [6]:
text_prompts = read_in_file("../generated_videos/prompts.txt")

In [14]:
output_video_path = "../../generated_videos/VideoFusion"
generate_videos(text_prompts, output_video_path)

## Display an example video

In [8]:
video_path = f"{output_video_path}/A_dog_wearing_a_Superhero_outfit_with_red_cape_flying_through_the_sky.mp4"

In [9]:
#@title Display the video
import imageio
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML

def display_video(video):
    fig = plt.figure(figsize=(4.2,4.2))  #Display size specification
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
    mov = []
    for i in range(len(video)):  #Append videos one by one to mov
        img = plt.imshow(video[i], animated=True)
        plt.axis('off')
        mov.append([img])

    #Animation creation
    anime = animation.ArtistAnimation(fig, mov, interval=100, repeat_delay=1000)

    plt.close()
    return anime
video = imageio.mimread(video_path)  #Loading video
HTML(display_video(video).to_html5_video())  #Inline video display in HTML5