### Video prompt evaluation

In the second block, choose the clip model, prompt and video file to be used. Run the rest of the blocks and enjoy your new gif

In [27]:
import open_clip
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import cv2
import gif

In [9]:
MODEL = 'RN50'
PRETRAINED = 'yfcc15m'

PROMPT = 'parachute guy in the sky'
FILENAME = './test.mp4'

In [3]:
model, _, preprocess = open_clip.create_model_and_transforms(MODEL, pretrained=PRETRAINED)
model = model.cuda()
tokenizer = open_clip.get_tokenizer(MODEL)

In [5]:
def run_video_through_clip(filename, prompt, model, preprocess, tokenizer):
  text_features = model.encode_text(tokenizer([prompt]).cuda())
  text_features /= text_features.norm(dim=-1, keepdim=True)

  clip_rewards = []
  frames = []
  vidcap = cv2.VideoCapture(filename)
  success, image = vidcap.read()
  while success:
    frames.append(image)

    image = preprocess(Image.fromarray(np.uint8(image))).unsqueeze(0)
    with torch.no_grad(), torch.cuda.amp.autocast():
      image = image.cuda()
      image_features = model.encode_image(image)
      image_features /= image_features.norm(dim=-1, keepdim=True)
      sim = (image_features @ text_features.T)
      reward = sim.cpu().detach().numpy()[0][0]
    
    clip_rewards.append(reward)
    success, image = vidcap.read()
    
  return np.array(frames), np.array(clip_rewards)

In [6]:
frames, clip_rewards = run_video_through_clip(FILENAME, PROMPT, model, preprocess, tokenizer)

In [26]:
@gif.frame
def plot_rewards_gif(i, frame, clip_rewards):
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    ax[0].plot(np.arange(0, i), clip_rewards[:i])
    ax[0].set_title('CLIP reward')
    ax[1].imshow(frame)

In [23]:
gif_frames = []
for i in tqdm(range(len(frames))):
    gif_frames.append(plot_rewards_gif(i, frames[i], clip_rewards))
gif.save(gif_frames, PROMPT + '_' + FILENAME + '.gif', duration=33)

  0%|          | 0/1110 [00:00<?, ?it/s]