In [9]:
#@title Curve Optimizer {vertical-output: true}

%cd /content/diffvg/apps/

prompt = "a sketch of a dog."
neg_prompt = "A badly drawn sketch."
neg_prompt_2 = "Many ugly, messy drawings."
text_input = clip.tokenize(prompt).to(device)
text_input_neg1 = clip.tokenize(neg_prompt).to(device)
text_input_neg2 = clip.tokenize(neg_prompt_2).to(device)
use_negative = False # Use negative prompts?

# Thanks to Katherine Crowson for this.
# In the CLIPDraw code used to generate examples, we don't normalize images
# before passing into CLIP, but really you should. Turn this to True to do that.
use_normalized_clip = False

# Calculate features
with torch.no_grad():
    text_features = model.encode_text(text_input)
    text_features_neg1 = model.encode_text(text_input_neg1)
    text_features_neg2 = model.encode_text(text_input_neg2)

import pydiffvg
import torch
import skimage
import skimage.io
import random
import ttools.modules
import argparse
import math
import torchvision
import torchvision.transforms as transforms

pydiffvg.set_print_timing(False)

gamma = 1.0

# ARGUMENTS. Feel free to play around with these, especially num_paths.
args = lambda: None
args.num_paths = 64
args.num_iter = 1000
args.max_width = 1

# Use GPU if available
pydiffvg.set_use_gpu(torch.cuda.is_available())
device = torch.device('cuda')
pydiffvg.set_device(device)

canvas_width, canvas_height = 224, 224
num_paths = args.num_paths
max_width = args.max_width

# Image Augmentation Transformation
augment_trans = transforms.Compose([
    transforms.RandomPerspective(fill=1, p=1, distortion_scale=0.5),
    transforms.RandomResizedCrop(224, scale=(0.7,0.9)),
])

if use_normalized_clip:
    augment_trans = transforms.Compose([
    transforms.RandomPerspective(fill=1, p=1, distortion_scale=0.5),
    transforms.RandomResizedCrop(224, scale=(0.7,0.9)),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
])


# Initialize Random Curves
shapes = []
shape_groups = []
for i in range(num_paths):
    num_segments = random.randint(1, 2)
    num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
    points = []
    p0 = (random.random(), random.random())
    points.append(p0)
    for j in range(num_segments):
        radius = 0.1
        p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
        p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
        p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
        points.append(p1)
        points.append(p2)
        points.append(p3)
        p0 = p3
    points = torch.tensor(points)
    points[:, 0] *= canvas_width
    points[:, 1] *= canvas_height
    path = pydiffvg.Path(num_control_points = num_control_points, points = points, stroke_width = torch.tensor(1.0), is_closed = False)
    shapes.append(path)
    path_group = pydiffvg.ShapeGroup(shape_ids = torch.tensor([len(shapes) - 1]), fill_color = None, stroke_color = torch.tensor([random.random(), random.random(), random.random(), random.random()]))
    shape_groups.append(path_group)

# Just some diffvg setup
scene_args = pydiffvg.RenderFunction.serialize_scene(\
    canvas_width, canvas_height, shapes, shape_groups)
render = pydiffvg.RenderFunction.apply
img = render(canvas_width, canvas_height, 2, 2, 0, None, *scene_args)
points_vars = []
stroke_width_vars = []
color_vars = []
for path in shapes:
    path.points.requires_grad = True
    points_vars.append(path.points)
    path.stroke_width.requires_grad = True
    stroke_width_vars.append(path.stroke_width)
for group in shape_groups:
    group.stroke_color.requires_grad = True
    color_vars.append(group.stroke_color)

# Optimizers
points_optim = torch.optim.Adam(points_vars, lr=1.0)
width_optim = torch.optim.Adam(stroke_width_vars, lr=0.1)
color_optim = torch.optim.Adam(color_vars, lr=0.01)

# Run the main optimization loop
for t in range(args.num_iter):

    # Anneal learning rate (makes videos look cleaner)
    if t == int(args.num_iter * 0.5):
        for g in points_optim.param_groups:
            g['lr'] = 0.4
    if t == int(args.num_iter * 0.75):
        for g in points_optim.param_groups:
            g['lr'] = 0.1

    points_optim.zero_grad()
    width_optim.zero_grad()
    color_optim.zero_grad()
    scene_args = pydiffvg.RenderFunction.serialize_scene(\
        canvas_width, canvas_height, shapes, shape_groups)
    img = render(canvas_width, canvas_height, 2, 2, t, None, *scene_args)
    img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device = pydiffvg.get_device()) * (1 - img[:, :, 3:4])
    if t % 5 == 0:
        pydiffvg.imwrite(img.cpu(), '/content/res/iter_{}.png'.format(int(t/5)), gamma=gamma)
    img = img[:, :, :3]
    img = img.unsqueeze(0)
    img = img.permute(0, 3, 1, 2) # NHWC -> NCHW

    loss = 0
    NUM_AUGS = 4
    img_augs = []
    for n in range(NUM_AUGS):
        img_augs.append(augment_trans(img))
    im_batch = torch.cat(img_augs)
    image_features = model.encode_image(im_batch)
    for n in range(NUM_AUGS):
        loss -= torch.cosine_similarity(text_features, image_features[n:n+1], dim=1)
        if use_negative:
            loss += torch.cosine_similarity(text_features_neg1, image_features[n:n+1], dim=1) * 0.3
            loss += torch.cosine_similarity(text_features_neg2, image_features[n:n+1], dim=1) * 0.3

    # Backpropagate the gradients.
    loss.backward()

    # Take a gradient descent step.
    points_optim.step()
    width_optim.step()
    color_optim.step()
    for path in shapes:
        path.stroke_width.data.clamp_(1.0, max_width)
    for group in shape_groups:
        group.stroke_color.data.clamp_(0.0, 1.0)

    if t % 10 == 0:
        show_img(img.detach().cpu().numpy()[0])
        # show_img(torch.cat([img.detach(), img_aug.detach()], axis=3).cpu().numpy()[0])
        print('render loss:', loss.item())
        print('iteration:', t)
        with torch.no_grad():
            im_norm = image_features / image_features.norm(dim=-1, keepdim=True)
            noun_norm = nouns_features / nouns_features.norm(dim=-1, keepdim=True)
            similarity = (100.0 * im_norm @ noun_norm.T).softmax(dim=-1)
            values, indices = similarity[0].topk(5)
            print("\nTop predictions:\n")
            for value, index in zip(values, indices):
                print(f"{nouns[index]:>16s}: {100 * value.item():.2f}%")

/content/diffvg/apps


<IPython.core.display.Image object>

render loss: -0.916015625
iteration: 0

Top predictions:

        particle: 2.01%
          sneeze: 0.89%
   precipitation: 0.81%
          helium: 0.76%
         rainbow: 0.75%


<IPython.core.display.Image object>

render loss: -1.20703125
iteration: 10

Top predictions:

            lion: 1.91%
             dog: 1.51%
            bear: 1.49%
             cat: 1.44%
          angora: 1.34%


<IPython.core.display.Image object>

render loss: -1.294921875
iteration: 20

Top predictions:

             dog: 13.31%
             fur: 2.17%
          angora: 2.07%
           puppy: 1.92%
             pet: 1.83%


<IPython.core.display.Image object>

render loss: -1.3125
iteration: 30

Top predictions:

             dog: 27.91%
           puppy: 4.21%
             pet: 3.60%
          animal: 1.37%
            bear: 1.33%


<IPython.core.display.Image object>

render loss: -1.2958984375
iteration: 40

Top predictions:

             dog: 21.81%
           puppy: 3.79%
             pet: 3.24%
             fur: 2.06%
            unit: 1.46%


<IPython.core.display.Image object>

render loss: -1.3291015625
iteration: 50

Top predictions:

             dog: 28.05%
           puppy: 4.44%
             pet: 3.00%
             fur: 2.53%
            wolf: 2.03%


<IPython.core.display.Image object>

render loss: -1.3525390625
iteration: 60

Top predictions:

             dog: 22.68%
             pig: 2.88%
           puppy: 2.71%
             pet: 2.51%
          animal: 1.95%


<IPython.core.display.Image object>

render loss: -1.34765625
iteration: 70

Top predictions:

             dog: 22.46%
             fur: 6.24%
          angora: 6.05%
           puppy: 3.67%
             pet: 2.81%


<IPython.core.display.Image object>

render loss: -1.37109375
iteration: 80

Top predictions:

             dog: 35.47%
             pet: 3.74%
           puppy: 3.30%
             fur: 1.46%
          animal: 1.10%


<IPython.core.display.Image object>

render loss: -1.3857421875
iteration: 90

Top predictions:

             dog: 45.24%
           puppy: 4.62%
             pet: 3.71%
             fur: 1.75%
    veterinarian: 1.06%


<IPython.core.display.Image object>

render loss: -1.3828125
iteration: 100

Top predictions:

             dog: 32.79%
           puppy: 4.95%
             pet: 3.15%
             fur: 3.10%
        siberian: 2.78%


<IPython.core.display.Image object>

render loss: -1.390625
iteration: 110

Top predictions:

             dog: 22.63%
             pig: 10.36%
           puppy: 4.60%
             pet: 4.46%
          animal: 1.80%


<IPython.core.display.Image object>

render loss: -1.4140625
iteration: 120

Top predictions:

             dog: 45.34%
           puppy: 6.53%
             pet: 4.35%
          animal: 1.35%
             fur: 0.78%


<IPython.core.display.Image object>

render loss: -1.3955078125
iteration: 130

Top predictions:

             dog: 32.57%
           puppy: 5.48%
             pet: 3.89%
             fur: 3.43%
          animal: 1.57%


<IPython.core.display.Image object>

render loss: -1.3857421875
iteration: 140

Top predictions:

             dog: 36.04%
           puppy: 6.88%
             pet: 3.30%
             fur: 1.09%
          animal: 1.09%


<IPython.core.display.Image object>

render loss: -1.40625
iteration: 150

Top predictions:

             dog: 34.91%
           puppy: 7.55%
             pet: 3.46%
         hamster: 2.96%
             fur: 1.71%


<IPython.core.display.Image object>

render loss: -1.400390625
iteration: 160

Top predictions:

             dog: 49.49%
           puppy: 7.59%
             pet: 3.94%
             fur: 1.98%
           owner: 1.13%


<IPython.core.display.Image object>

render loss: -1.42578125
iteration: 170

Top predictions:

             dog: 32.03%
           puppy: 5.07%
             pet: 2.47%
             fur: 2.47%
          copper: 1.41%


<IPython.core.display.Image object>

render loss: -1.4189453125
iteration: 180

Top predictions:

             dog: 43.77%
           puppy: 10.08%
             pet: 3.07%
             fur: 1.39%
        siberian: 1.03%


<IPython.core.display.Image object>

render loss: -1.431640625
iteration: 190

Top predictions:

             dog: 44.56%
           puppy: 6.03%
             pet: 2.85%
          animal: 1.39%
             fur: 1.23%


<IPython.core.display.Image object>

render loss: -1.4248046875
iteration: 200

Top predictions:

             dog: 35.25%
           puppy: 6.94%
             pet: 2.47%
             fur: 2.22%
            blue: 1.35%


<IPython.core.display.Image object>

render loss: -1.431640625
iteration: 210

Top predictions:

             dog: 41.09%
           puppy: 5.73%
             pet: 3.82%
             pig: 1.50%
          animal: 1.36%


<IPython.core.display.Image object>

render loss: -1.427734375
iteration: 220

Top predictions:

             dog: 27.64%
           puppy: 6.37%
             pet: 4.11%
             fur: 3.30%
          animal: 1.77%


<IPython.core.display.Image object>

render loss: -1.4267578125
iteration: 230

Top predictions:

             dog: 32.52%
           puppy: 4.40%
             pet: 3.02%
             cow: 1.89%
          animal: 1.24%


<IPython.core.display.Image object>

render loss: -1.447265625
iteration: 240

Top predictions:

             dog: 56.30%
           puppy: 8.63%
             pet: 2.63%
          animal: 1.21%
             fur: 0.78%


<IPython.core.display.Image object>

render loss: -1.42578125
iteration: 250

Top predictions:

             dog: 45.07%
           puppy: 7.83%
             pet: 3.82%
             fur: 1.40%
           owner: 1.38%


<IPython.core.display.Image object>

render loss: -1.427734375
iteration: 260

Top predictions:

             dog: 39.72%
           puppy: 6.29%
             pet: 4.06%
          animal: 1.20%
           owner: 1.16%


<IPython.core.display.Image object>

render loss: -1.404296875
iteration: 270

Top predictions:

             dog: 29.25%
           puppy: 11.46%
             pet: 3.72%
          copper: 1.25%
         sausage: 1.05%


<IPython.core.display.Image object>

render loss: -1.4501953125
iteration: 280

Top predictions:

             dog: 47.63%
           puppy: 9.09%
             pet: 4.03%
             fur: 2.23%
          animal: 1.90%


<IPython.core.display.Image object>

render loss: -1.435546875
iteration: 290

Top predictions:

             dog: 42.36%
           puppy: 11.05%
             pet: 3.59%
          animal: 1.75%
         dogsled: 1.32%


<IPython.core.display.Image object>

render loss: -1.4453125
iteration: 300

Top predictions:

             dog: 47.68%
           puppy: 7.78%
             pet: 4.30%
           owner: 1.79%
             fur: 1.40%


<IPython.core.display.Image object>

render loss: -1.4404296875
iteration: 310

Top predictions:

             dog: 32.18%
           puppy: 5.59%
             pet: 3.39%
          animal: 1.82%
          angora: 1.60%


<IPython.core.display.Image object>

render loss: -1.44140625
iteration: 320

Top predictions:

             dog: 47.56%
           puppy: 8.26%
             pet: 3.56%
          animal: 1.96%
             fur: 0.77%


<IPython.core.display.Image object>

render loss: -1.4306640625
iteration: 330

Top predictions:

             dog: 40.14%
           puppy: 6.98%
             pet: 2.82%
             fur: 1.56%
          animal: 1.37%


<IPython.core.display.Image object>

render loss: -1.42578125
iteration: 340

Top predictions:

             dog: 43.92%
           puppy: 9.50%
             pet: 4.09%
             fur: 1.55%
          animal: 1.17%


<IPython.core.display.Image object>

render loss: -1.4384765625
iteration: 350

Top predictions:

             dog: 42.50%
           puppy: 7.39%
             pet: 5.08%
           owner: 1.70%
             fur: 1.50%


<IPython.core.display.Image object>

render loss: -1.4521484375
iteration: 360

Top predictions:

             dog: 42.99%
           puppy: 9.30%
             pet: 4.00%
          animal: 2.08%
          angora: 1.67%


<IPython.core.display.Image object>

render loss: -1.4453125
iteration: 370

Top predictions:

             dog: 59.33%
           puppy: 7.54%
             pet: 4.17%
          animal: 1.58%
           owner: 0.79%


<IPython.core.display.Image object>

render loss: -1.44140625
iteration: 380

Top predictions:

             dog: 44.60%
           puppy: 12.38%
             pet: 3.13%
             fur: 1.60%
          animal: 1.07%


<IPython.core.display.Image object>

render loss: -1.43359375
iteration: 390

Top predictions:

             dog: 39.14%
           puppy: 8.20%
             pet: 4.00%
             fur: 1.83%
         dogsled: 1.80%


<IPython.core.display.Image object>

render loss: -1.443359375
iteration: 400

Top predictions:

             dog: 54.30%
           puppy: 11.38%
             pet: 5.72%
         sausage: 1.16%
          animal: 1.13%


<IPython.core.display.Image object>

render loss: -1.451171875
iteration: 410

Top predictions:

             dog: 48.58%
           puppy: 5.28%
             pet: 3.99%
          animal: 1.88%
         dogsled: 1.29%


<IPython.core.display.Image object>

render loss: -1.439453125
iteration: 420

Top predictions:

             dog: 42.14%
           puppy: 8.30%
             pet: 4.17%
             fur: 1.16%
          animal: 1.12%


<IPython.core.display.Image object>

render loss: -1.46484375
iteration: 430

Top predictions:

             dog: 50.93%
           puppy: 7.57%
             pet: 4.18%
          animal: 1.24%
         sausage: 1.06%


<IPython.core.display.Image object>

render loss: -1.458984375
iteration: 440

Top predictions:

             dog: 47.07%
           puppy: 7.93%
             pet: 4.96%
          animal: 1.66%
             fur: 1.34%


<IPython.core.display.Image object>

render loss: -1.4609375
iteration: 450

Top predictions:

             dog: 43.02%
           puppy: 13.12%
             pet: 3.53%
          animal: 2.01%
             fur: 1.89%


<IPython.core.display.Image object>

render loss: -1.439453125
iteration: 460

Top predictions:

             dog: 35.57%
           puppy: 9.58%
             pet: 3.87%
           owner: 2.14%
         sausage: 1.24%


<IPython.core.display.Image object>

render loss: -1.458984375
iteration: 470

Top predictions:

             dog: 34.08%
           puppy: 4.20%
             pet: 2.39%
            wolf: 2.18%
          animal: 2.18%


<IPython.core.display.Image object>

render loss: -1.46484375
iteration: 480

Top predictions:

             dog: 61.18%
           puppy: 6.06%
             pet: 3.91%
          animal: 1.58%
             fur: 0.68%


<IPython.core.display.Image object>

render loss: -1.453125
iteration: 490

Top predictions:

             dog: 27.47%
            wolf: 3.49%
             fur: 3.08%
          animal: 2.89%
           puppy: 2.80%


<IPython.core.display.Image object>

render loss: -1.45703125
iteration: 500

Top predictions:

             dog: 33.20%
           puppy: 5.42%
             pet: 3.39%
             fur: 2.41%
          animal: 1.65%


<IPython.core.display.Image object>

render loss: -1.470703125
iteration: 510

Top predictions:

             dog: 50.73%
           puppy: 8.54%
             pet: 4.87%
          animal: 2.23%
            asia: 0.65%


<IPython.core.display.Image object>

render loss: -1.4580078125
iteration: 520

Top predictions:

             dog: 48.95%
           puppy: 7.06%
             pet: 4.55%
          animal: 1.78%
         dogsled: 1.53%


<IPython.core.display.Image object>

render loss: -1.484375
iteration: 530

Top predictions:

             dog: 52.34%
           puppy: 5.87%
             pet: 2.69%
          animal: 2.37%
             fur: 1.68%


<IPython.core.display.Image object>

render loss: -1.458984375
iteration: 540

Top predictions:

             dog: 55.71%
           puppy: 6.25%
             pet: 2.69%
          animal: 1.31%
         dogsled: 1.09%


<IPython.core.display.Image object>

render loss: -1.478515625
iteration: 550

Top predictions:

             dog: 52.20%
           puppy: 7.06%
             pet: 3.23%
          animal: 2.29%
             fur: 1.39%


<IPython.core.display.Image object>

render loss: -1.4716796875
iteration: 560

Top predictions:

             dog: 38.38%
           puppy: 4.31%
             pet: 4.05%
          animal: 2.69%
             pig: 1.85%


<IPython.core.display.Image object>

render loss: -1.486328125
iteration: 570

Top predictions:

             dog: 52.15%
           puppy: 5.67%
             pet: 4.02%
          animal: 1.35%
            grey: 0.90%


<IPython.core.display.Image object>

render loss: -1.4716796875
iteration: 580

Top predictions:

             dog: 59.38%
           puppy: 8.83%
             pet: 3.25%
          animal: 1.35%
             fur: 0.96%


<IPython.core.display.Image object>

render loss: -1.486328125
iteration: 590

Top predictions:

             dog: 47.95%
           puppy: 6.49%
             pet: 3.70%
          animal: 3.07%
             fur: 1.36%


<IPython.core.display.Image object>

render loss: -1.5009765625
iteration: 600

Top predictions:

             dog: 58.54%
           puppy: 4.80%
             pet: 3.63%
          animal: 1.94%
             fur: 1.46%


<IPython.core.display.Image object>

render loss: -1.501953125
iteration: 610

Top predictions:

             dog: 56.74%
           puppy: 6.17%
             pet: 5.44%
             fur: 2.34%
          animal: 2.34%


<IPython.core.display.Image object>

render loss: -1.474609375
iteration: 620

Top predictions:

             dog: 27.56%
             cow: 4.36%
          donkey: 3.40%
             pet: 2.91%
          animal: 2.65%


<IPython.core.display.Image object>

render loss: -1.4775390625
iteration: 630

Top predictions:

             dog: 57.08%
           puppy: 10.24%
             pet: 3.32%
         dogsled: 1.95%
          animal: 1.83%


<IPython.core.display.Image object>

render loss: -1.4990234375
iteration: 640

Top predictions:

             dog: 54.35%
         dogsled: 5.05%
           puppy: 3.59%
             pet: 3.37%
          animal: 1.98%


<IPython.core.display.Image object>

render loss: -1.4892578125
iteration: 650

Top predictions:

             dog: 58.25%
           puppy: 10.44%
             pet: 5.59%
          animal: 1.81%
             fur: 0.94%


<IPython.core.display.Image object>

render loss: -1.51171875
iteration: 660

Top predictions:

             dog: 63.72%
           puppy: 7.61%
             pet: 4.34%
          animal: 2.47%
        anteater: 0.69%


<IPython.core.display.Image object>

render loss: -1.482421875
iteration: 670

Top predictions:

             dog: 48.93%
           puppy: 5.66%
             pet: 2.94%
             fur: 2.36%
          animal: 2.22%


<IPython.core.display.Image object>

render loss: -1.4892578125
iteration: 680

Top predictions:

             dog: 58.79%
           puppy: 5.64%
             pet: 2.93%
         dogsled: 1.78%
          animal: 1.38%


<IPython.core.display.Image object>

render loss: -1.4609375
iteration: 690

Top predictions:

             dog: 54.44%
           puppy: 8.89%
             pet: 3.48%
          animal: 1.24%
             pig: 1.10%


<IPython.core.display.Image object>

render loss: -1.494140625
iteration: 700

Top predictions:

             dog: 59.08%
           puppy: 5.85%
             pet: 4.02%
          animal: 2.37%
         dogsled: 1.23%


<IPython.core.display.Image object>

render loss: -1.5068359375
iteration: 710

Top predictions:

             dog: 66.06%
           puppy: 8.14%
             pet: 3.19%
         dogsled: 1.65%
          animal: 1.14%


<IPython.core.display.Image object>

render loss: -1.4951171875
iteration: 720

Top predictions:

             dog: 46.73%
           puppy: 8.37%
             pet: 4.08%
         dogsled: 3.08%
          animal: 1.50%


<IPython.core.display.Image object>

render loss: -1.5078125
iteration: 730

Top predictions:

             dog: 54.30%
           puppy: 6.90%
             pet: 3.58%
          animal: 1.80%
             fur: 1.75%


<IPython.core.display.Image object>

render loss: -1.49609375
iteration: 740

Top predictions:

             dog: 60.55%
           puppy: 6.38%
             pet: 4.25%
          animal: 2.28%
             fur: 0.95%


<IPython.core.display.Image object>

render loss: -1.5
iteration: 750

Top predictions:

             dog: 58.89%
           puppy: 4.68%
             pet: 3.22%
          animal: 1.62%
         dogsled: 1.34%


<IPython.core.display.Image object>

render loss: -1.505859375
iteration: 760

Top predictions:

             dog: 65.77%
           puppy: 8.11%
             pet: 4.07%
          animal: 1.45%
         dogsled: 1.17%


<IPython.core.display.Image object>

render loss: -1.4990234375
iteration: 770

Top predictions:

             dog: 53.66%
           puppy: 6.62%
             pet: 3.33%
          animal: 2.29%
             fur: 1.90%


<IPython.core.display.Image object>

render loss: -1.498046875
iteration: 780

Top predictions:

             dog: 58.50%
           puppy: 6.99%
             pet: 4.65%
          animal: 2.42%
         dogsled: 1.33%


<IPython.core.display.Image object>

render loss: -1.5048828125
iteration: 790

Top predictions:

             dog: 64.79%
           puppy: 8.77%
             pet: 2.59%
          animal: 1.62%
             fur: 1.08%


<IPython.core.display.Image object>

render loss: -1.50390625
iteration: 800

Top predictions:

             dog: 59.62%
           puppy: 6.90%
             pet: 6.09%
          animal: 1.64%
         dogsled: 0.75%


<IPython.core.display.Image object>

render loss: -1.515625
iteration: 810

Top predictions:

             dog: 61.77%
           puppy: 5.57%
             pet: 3.49%
          animal: 1.55%
         dogsled: 1.20%


<IPython.core.display.Image object>

render loss: -1.513671875
iteration: 820

Top predictions:

             dog: 61.52%
           puppy: 9.73%
             pet: 3.47%
          animal: 2.38%
            asia: 0.88%


<IPython.core.display.Image object>

render loss: -1.4951171875
iteration: 830

Top predictions:

             dog: 59.96%
           puppy: 9.78%
             pet: 4.08%
         dogsled: 2.80%
          animal: 2.80%


<IPython.core.display.Image object>

render loss: -1.5087890625
iteration: 840

Top predictions:

             dog: 61.52%
           puppy: 6.29%
             pet: 3.70%
          animal: 1.24%
         dogsled: 1.09%


<IPython.core.display.Image object>

render loss: -1.490234375
iteration: 850

Top predictions:

             dog: 50.49%
           puppy: 8.50%
             pet: 3.66%
          animal: 1.52%
            unit: 1.15%


<IPython.core.display.Image object>

render loss: -1.505859375
iteration: 860

Top predictions:

             dog: 59.72%
           puppy: 7.83%
             pet: 3.59%
          animal: 1.24%
         dogsled: 1.06%


<IPython.core.display.Image object>

render loss: -1.521484375
iteration: 870

Top predictions:

             dog: 47.90%
           puppy: 8.86%
             pet: 6.28%
         dogsled: 2.88%
          animal: 1.98%


<IPython.core.display.Image object>

render loss: -1.53125
iteration: 880

Top predictions:

             dog: 56.84%
           puppy: 3.99%
          animal: 2.66%
             pet: 1.95%
           horse: 0.81%


<IPython.core.display.Image object>

render loss: -1.517578125
iteration: 890

Top predictions:

             dog: 60.16%
           puppy: 7.65%
             pet: 4.09%
          animal: 1.82%
             fur: 1.17%


<IPython.core.display.Image object>

render loss: -1.474609375
iteration: 900

Top predictions:

             dog: 53.91%
           puppy: 5.86%
             pet: 3.67%
             fur: 1.73%
          animal: 1.27%


<IPython.core.display.Image object>

render loss: -1.5068359375
iteration: 910

Top predictions:

             dog: 58.50%
           puppy: 8.97%
             pet: 3.98%
         dogsled: 2.49%
          animal: 1.71%


<IPython.core.display.Image object>

render loss: -1.517578125
iteration: 920

Top predictions:

             dog: 55.62%
             pet: 7.30%
           puppy: 5.86%
          animal: 2.09%
         dogsled: 1.15%


<IPython.core.display.Image object>

render loss: -1.52734375
iteration: 930

Top predictions:

             dog: 56.69%
           puppy: 4.65%
         dogsled: 4.65%
             pet: 4.24%
          animal: 2.13%


<IPython.core.display.Image object>

render loss: -1.4990234375
iteration: 940

Top predictions:

             dog: 44.80%
           puppy: 6.66%
             pet: 3.92%
          animal: 2.53%
         dogsled: 1.74%


<IPython.core.display.Image object>

render loss: -1.5234375
iteration: 950

Top predictions:

             dog: 55.57%
           puppy: 6.64%
             pet: 2.60%
          animal: 1.58%
         dogsled: 1.05%


<IPython.core.display.Image object>

render loss: -1.51953125
iteration: 960

Top predictions:

             dog: 63.77%
           puppy: 5.75%
             pet: 2.98%
         dogsled: 2.63%
          animal: 2.18%


<IPython.core.display.Image object>

render loss: -1.5029296875
iteration: 970

Top predictions:

             dog: 53.81%
           puppy: 9.06%
             pet: 6.42%
          animal: 2.94%
             fur: 1.15%


<IPython.core.display.Image object>

render loss: -1.533203125
iteration: 980

Top predictions:

             dog: 64.36%
           puppy: 4.97%
             pet: 4.24%
          animal: 1.72%
         dogsled: 1.51%


<IPython.core.display.Image object>

render loss: -1.498046875
iteration: 990

Top predictions:

             dog: 48.61%
           puppy: 3.31%
          animal: 3.20%
             pet: 2.83%
         dogsled: 1.51%


# CLIPDraw
Synthesize drawings to match a text prompt!


![](https://kvfrans.com/content/images/2021/06/Screen-Shot-2021-06-10-at-8.47.23-PM.png)


> This work presents CLIPDraw, an algorithm that synthesizes novel drawings based on natural language input. CLIPDraw does not require any training; rather a pre-trained CLIP language-image encoder is used as a metric for maximizing similarity between the given description and a generated drawing. Crucially, CLIPDraw operates over vector strokes rather than pixel images, a constraint that biases drawings towards simpler human-recognizable shapes. Results compare between CLIPDraw and other synthesis-through-optimization methods, as well as highlight various interesting behaviors of CLIPDraw, such as satisfying ambiguous text in multiple ways, reliably producing drawings in diverse artistic styles, and scaling from simple to complex visual representations as stroke count is increased.





This Colab notebook goes along with the paper: [CLIPDraw: Exploring Text-to-Drawing Synthesis through Language-Image Encoders](https://arxiv.org/abs/2106.14843)

by **Kevin Frans**, L.B. Soros, Olaf Witkowski

Read the blog post for cool results and analysis! [https://kvfrans.com/clipdraw-exploring-text-to-drawing-synthesis/](https://kvfrans.com/clipdraw-exploring-text-to-drawing-synthesis/)

Feel free to tweet me any cool creations, [@kvfrans](https://twitter.com/kvfrans)

Code adapted from diffvg: https://github.com/BachiLi/diffvg/blob/master/apps/painterly_rendering.py

---






**STEPS:**


1. Click "Connect" in the top right corner
2. Runtime -> Change runtime type -> Hardware accelerator -> GPU
2. Click the run button on "Pre Installation". This will install dependencies, it may take a while.
2. **Important:** Runtime -> Restart Runtime
3. Run the "Imports and Notebook Utilities" and "Load CLIP" sections.


5. "Curve Optimizer" will synthesize a drawing to match your text. You can edit the text prompt at the top of the code block.
6. "Video Renderer" can create videos that show the optimization process, and videos that render a drawing stroke-by-stroke.

In [2]:
#@title Pre Installation {vertical-output: true}

import subprocess

CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
print("CUDA version:", CUDA_version)

if CUDA_version == "10.0":
    torch_version_suffix = "+cu100"
elif CUDA_version == "10.1":
    torch_version_suffix = "+cu101"
elif CUDA_version == "10.2":
    torch_version_suffix = ""
else:
    torch_version_suffix = "+cu110"

# !pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex
%cd /content/
!pip install svgwrite
!pip install svgpathtools
!pip install cssutils
!pip install numba
!pip install torch-tools
!pip install visdom

!git clone https://github.com/BachiLi/diffvg
%cd diffvg
# !ls
!git submodule update --init --recursive

data = []
with open("/content/diffvg/CMakeLists.txt", "r+") as inFile:
    for line in inFile:
        if "find_package(TensorFlow)" in line:
            pass
        else:
            data.append(line)
    inFile.seek(0)
    for d in data:
        inFile.write(d)
    inFile.truncate()
!python setup.py install

!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git --no-deps

CUDA version: 12.2
/content
Collecting svgwrite
  Downloading svgwrite-1.4.3-py3-none-any.whl.metadata (8.8 kB)
Downloading svgwrite-1.4.3-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.1/67.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: svgwrite
Successfully installed svgwrite-1.4.3
Collecting svgpathtools
  Downloading svgpathtools-1.6.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading svgpathtools-1.6.1-py2.py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.4/67.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: svgpathtools
Successfully installed svgpathtools-1.6.1
Collecting cssutils
  Downloading cssutils-2.11.1-py3-none-any.whl.metadata (8.7 kB)
Downloading cssutils-2.11.1-py3-none-any.whl (385 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m385.7/385.7 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [1]:
#@title Imports and Notebook Utilities {vertical-output: true}
%tensorflow_version 2.x

import os
import io
import PIL.Image, PIL.ImageDraw
import base64
import zipfile
import json
import requests
import numpy as np
import matplotlib.pylab as pl
import glob

from IPython.display import Image, HTML, clear_output
from tqdm import tqdm_notebook, tnrange

os.environ['FFMPEG_BINARY'] = 'ffmpeg'
import moviepy.editor as mvp
from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter


def imread(url, max_size=None, mode=None):
  if url.startswith(('http:', 'https:')):
    r = requests.get(url)
    f = io.BytesIO(r.content)
  else:
    f = url
  img = PIL.Image.open(f)
  if max_size is not None:
    img = img.resize((max_size, max_size))
  if mode is not None:
    img = img.convert(mode)
  img = np.float32(img)/255.0
  return img

def np2pil(a):
  if a.dtype in [np.float32, np.float64]:
    a = np.uint8(np.clip(a, 0, 1)*255)
  return PIL.Image.fromarray(a)

def imwrite(f, a, fmt=None):
  a = np.asarray(a)
  if isinstance(f, str):
    fmt = f.rsplit('.', 1)[-1].lower()
    if fmt == 'jpg':
      fmt = 'jpeg'
    f = open(f, 'wb')
  np2pil(a).save(f, fmt, quality=95)

def imencode(a, fmt='jpeg'):
  a = np.asarray(a)
  if len(a.shape) == 3 and a.shape[-1] == 4:
    fmt = 'png'
  f = io.BytesIO()
  imwrite(f, a, fmt)
  return f.getvalue()

def im2url(a, fmt='jpeg'):
  encoded = imencode(a, fmt)
  base64_byte_string = base64.b64encode(encoded).decode('ascii')
  return 'data:image/' + fmt.upper() + ';base64,' + base64_byte_string

def imshow(a, fmt='jpeg'):
  display(Image(data=imencode(a, fmt)))


def tile2d(a, w=None):
  a = np.asarray(a)
  if w is None:
    w = int(np.ceil(np.sqrt(len(a))))
  th, tw = a.shape[1:3]
  pad = (w-len(a))%w
  a = np.pad(a, [(0, pad)]+[(0, 0)]*(a.ndim-1), 'constant')
  h = len(a)//w
  a = a.reshape([h, w]+list(a.shape[1:]))
  a = np.rollaxis(a, 2, 1).reshape([th*h, tw*w]+list(a.shape[4:]))
  return a

from torchvision import utils
def show_img(img):
    img = np.transpose(img, (1, 2, 0))
    img = np.clip(img, 0, 1)
    img = np.uint8(img * 254)
    # img = np.repeat(img, 4, axis=0)
    # img = np.repeat(img, 4, axis=1)
    pimg = PIL.Image.fromarray(img, mode="RGB")
    imshow(pimg)

def zoom(img, scale=4):
  img = np.repeat(img, scale, 0)
  img = np.repeat(img, scale, 1)
  return img

class VideoWriter:
  def __init__(self, filename='_autoplay.mp4', fps=30.0, **kw):
    self.writer = None
    self.params = dict(filename=filename, fps=fps, **kw)

  def add(self, img):
    img = np.asarray(img)
    if self.writer is None:
      h, w = img.shape[:2]
      self.writer = FFMPEG_VideoWriter(size=(w, h), **self.params)
    if img.dtype in [np.float32, np.float64]:
      img = np.uint8(img.clip(0, 1)*255)
    if len(img.shape) == 2:
      img = np.repeat(img[..., None], 3, -1)
    self.writer.write_frame(img)

  def close(self):
    if self.writer:
      self.writer.close()

  def __enter__(self):
    return self

  def __exit__(self, *kw):
    self.close()
    if self.params['filename'] == '_autoplay.mp4':
      self.show()

  def show(self, **kw):
      self.close()
      fn = self.params['filename']
      display(mvp.ipython_display(fn, **kw))

!nvidia-smi -L

import numpy as np
import torch
import os

# torch.set_default_tensor_type('torch.cuda.FloatTensor')

print("Torch version:", torch.__version__)

# !pip install DALL-E

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


  if event.key is 'enter':



GPU 0: Tesla T4 (UUID: GPU-168fbcf9-9b25-c31c-638c-6bf887b5185b)
Torch version: 2.5.0+cu121


In [2]:
#@title Load CLIP {vertical-output: true}

# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

import os
import clip
import torch
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torchvision.datasets import CIFAR100

# Load the model
device = torch.device('cuda')
model, preprocess = clip.load('ViT-B/32', device, jit=False)

nouns = "aardvark abyssinian accelerator accordion account accountant acknowledgment acoustic acrylic act action active activity actor actress adapter addition address adjustment adult advantage advertisement advice afghanistan africa aftermath afternoon aftershave afterthought age agenda agreement air airbus airmail airplane airport airship alarm albatross alcohol algebra algeria alibi alley alligator alloy almanac alphabet alto aluminium aluminum ambulance america amount amusement anatomy anethesiologist anger angle angora animal anime ankle answer ant antarctica anteater antelope anthony anthropology apartment apology apparatus apparel appeal appendix apple appliance approval april aquarius arch archaeology archeology archer architecture area argentina argument aries arithmetic arm armadillo armchair armenian army arrow art ash ashtray asia asparagus asphalt asterisk astronomy athlete atm atom attack attempt attention attic attraction august aunt australia australian author authorisation authority authorization avenue babies baboon baby back backbone bacon badge badger bag bagel bagpipe bail bait baker bakery balance balinese ball balloon bamboo banana band bandana bangladesh bangle banjo bank bankbook banker bar barbara barber barge baritone barometer base baseball basement basin basket basketball bass bassoon bat bath bathroom bathtub battery battle bay beach bead beam bean bear beard beast beat beautician beauty beaver bed bedroom bee beech beef beer beet beetle beggar beginner begonia behavior belgian belief believe bell belt bench bengal beret berry bestseller betty bibliography bicycle bike bill billboard biology biplane birch bird birth birthday bit bite black bladder blade blanket blinker blizzard block blood blouse blow blowgun blue board boat bobcat body bolt bomb bomber bone bongo bonsai book bookcase booklet boot border botany bottle bottom boundary bow bowl bowling box boy bra brace bracket brain brake branch brand brandy brass brazil bread break breakfast breath brian brick bridge british broccoli brochure broker bronze brother brother-in-law brow brown brush bubble bucket budget buffer buffet bugle building bulb bull bulldozer bumper bun burglar burma burn burst bus bush business butane butcher butter button buzzard cabbage cabinet cable cactus cafe cake calculator calculus calendar calf call camel camera camp can canada canadian cancer candle cannon canoe canvas cap capital cappelletti capricorn captain caption car caravan carbon card cardboard cardigan care carnation carol carp carpenter carriage carrot cart cartoon case cast castanet cat catamaran caterpillar cathedral catsup cattle cauliflower cause caution cave c-clamp cd ceiling celery celeste cell cellar cello celsius cement cemetery cent centimeter century ceramic cereal certification chain chair chalk chance change channel character chard charles chauffeur check cheek cheese cheetah chef chemistry cheque cherries cherry chess chest chick chicken chicory chief child children chill chime chimpanzee chin china chinese chive chocolate chord christmas christopher chronometer church cicada cinema circle circulation cirrus citizenship city clam clarinet class claus clave clef clerk click client climb clipper cloakroom clock close closet cloth cloud cloudy clover club clutch coach coal coast coat cobweb cockroach cocktail cocoa cod coffee coil coin coke cold collar college collision colombia colon colony color colt column columnist comb comfort comic comma command commission committee community company comparison competition competitor composer composition computer condition condor cone confirmation conga congo conifer connection consonant continent control cook cooking copper copy copyright cord cork cormorant corn cornet correspondent cost cotton couch cougar cough country course court cousin cover cow cowbell crab crack cracker craftsman crate crawdad crayfish crayon cream creator creature credit creditor creek crib cricket crime criminal crocodile crocus croissant crook crop cross crow crowd crown crush cry cub cuban cucumber cultivator cup cupboard cupcake curler currency current curtain curve cushion custard customer cut cuticle cycle cyclone cylinder cymbal dad daffodil dahlia daisy damage dance dancer danger daniel dash dashboard database date daughter david day dead deadline deal death deborah debt debtor decade december decimal decision decrease dedication deer defense deficit degree delete delivery den denim dentist deodorant department deposit description desert design desire desk dessert destruction detail detective development dew diamond diaphragm dibble dictionary dietician difference digestion digger digital dill dime dimple dinghy dinner dinosaur diploma dipstick direction dirt disadvantage discovery discussion disease disgust dish distance distribution distributor diving division divorced dock doctor dog dogsled doll dollar dolphin domain donald donkey donna door dorothy double doubt downtown dragon dragonfly drain drake drama draw drawbridge drawer dream dredger dress dresser dressing drill drink drive driver driving drizzle drop drug drum dry dryer duck duckling dugout dungeon dust eagle ear earth earthquake ease east edge edger editor editorial education edward eel effect egg eggnog eggplant egypt eight elbow element elephant elizabeth ellipse emery employee employer encyclopedia end enemy energy engine engineer engineering english enquiry entrance environment epoch epoxy equinox equipment era error estimate ethernet ethiopia euphonium europe evening event examination example exchange exclamation exhaust ex-husband existence expansion experience expert explanation ex-wife eye eyebrow eyelash eyeliner face facilities fact factory fahrenheit fairies fall family fan fang farm farmer fat father father-in-law faucet fear feast feather feature february fedelini feedback feeling feet felony female fender ferry ferryboat fertilizer fiber fiberglass fibre fiction field fifth fight fighter file find fine finger fir fire fired fireman fireplace firewall fish fisherman flag flame flare flat flavor flax flesh flight flock flood floor flower flugelhorn flute fly foam fog fold font food foot football footnote force forecast forehead forest forgery fork form format fortnight foundation fountain fowl fox foxglove fragrance frame france freckle freeze freezer freighter french freon friction friday fridge friend frog front frost frown fruit fuel fur furniture galley gallon game gander garage garden garlic gas gasoline gate gateway gauge gazelle gear gearshift geese gemini gender geography geology geometry george geranium german germany ghana ghost giant giraffe girdle girl gladiolus glass glider gliding glockenspiel glove glue goal goat gold goldfish golf gondola gong good-bye goose gore-tex gorilla gosling government governor grade grain gram granddaughter grandfather grandmother grandson grape graphic grass grasshopper gray grease great-grandfather great-grandmother greece greek green grenade grey grill grip ground group grouse growth guarantee guatemalan guide guilty guitar gum gun gym gymnast hacksaw hail hair haircut half-brother half-sister halibut hall hallway hamburger hammer hamster hand handball handicap handle handsaw harbor hardboard hardcover hardhat hardware harmonica harmony harp hat hate hawk head headlight headline health hearing heart heat heaven hedge height helen helicopter helium hell helmet help hemp hen heron herring hexagon hill himalayan hip hippopotamus history hobbies hockey hoe hole holiday home honey hood hook hope horn horse hose hospital hot hour hourglass house hovercraft hub hubcap humidity humor hurricane hyacinth hydrant hydrofoil hydrogen hyena hygienic ice icebreaker icicle icon idea ikebana illegal imprisonment improvement impulse inch income increase index india indonesia industry ink innocent input insect instruction instrument insulation insurance interactive interest internet interviewer intestine invention inventory invoice iran iraq iris iron island israel italian italy jacket jaguar jail jam james january japan japanese jar jasmine jason jaw jeans jeep jeff jelly jellyfish jennifer jet jewel jogging john join joke joseph journey judge judo juice july jumbo jump jumper june jury justice jute kale kamikaze kangaroo karate karen kayak kendo kenneth kenya ketchup kettle kettledrum kevin key keyboard keyboarding kick kidney kilogram kilometer kimberly kiss kitchen kite kitten kitty knee knickers knife knight knot knowledge kohlrabi korean laborer lace ladybug lake lamb lamp lan land landmine language larch lasagna latency latex lathe laugh laundry laura law lawyer layer lead leaf learning leather leek leg legal lemonade lentil leo leopard letter lettuce level libra library license lier lift light lightning lilac lily limit linda line linen link lion lip lipstick liquid liquor lisa list literature litter liver lizard llama loaf loan lobster lock locket locust look loss lotion love low lumber lunch lunchroom lung lunge lute luttuce lycra lynx lyocell lyre lyric macaroni machine macrame magazine magic magician maid mail mailbox mailman makeup malaysia male mall mallet man manager mandolin manicure manx map maple maraca marble march margaret margin maria marimba mark mark market married mary mascara mask mass match math mattock may mayonnaise meal measure meat mechanic medicine meeting melody memory men menu mercury message metal meteorology meter methane mexican mexico mice michael michelle microwave middle mile milk milkshake millennium millimeter millisecond mimosa mind mine minibus mini-skirt minister mint minute mirror missile mist mistake mitten moat modem mole mom monday money monkey month moon morning morocco mosque mosquito mother mother-in-law motion motorboat motorcycle mountain mouse moustache mouth move multi-hop multimedia muscle museum music musician mustard myanmar nail name nancy napkin narcissus nation neck need needle neon nepal nephew nerve nest net network news newsprint newsstand nic nickel niece nigeria night nitrogen node noise noodle north north america north korea norwegian nose note notebook notify novel november number numeric nurse nut nylon oak oatmeal objective oboe observation occupation ocean ocelot octagon octave october octopus odometer offence offer office oil okra olive onion open opera operation ophthalmologist opinion option orange orchestra orchid order organ organisation organization ornament ostrich otter ounce output outrigger oval oven overcoat owl owner ox oxygen oyster package packet page pail pain paint pair pajama pakistan palm pamphlet pan pancake pancreas panda pansy panther panties pantry pants panty pantyhose paper paperback parade parallelogram parcel parent parentheses park parrot parsnip part particle partner partridge party passbook passenger passive pasta paste pastor pastry patch path patient patio patricia paul payment pea peace peak peanut pear pedestrian pediatrician peen peer-to-peer pelican pen penalty pencil pendulum pentagon peony pepper perch perfume period periodical peripheral permission persian person peru pest pet pharmacist pheasant philippines philosophy phone physician piano piccolo pickle picture pie pig pigeon pike pillow pilot pimple pin pine ping pink pint pipe pisces pizza place plain plane planet plant plantation plaster plasterboard plastic plate platinum play playground playroom pleasure plier plot plough plow plywood pocket poet point poison poland police policeman polish politician pollution polo polyester pond popcorn poppy population porch porcupine port porter position possibility postage postbox pot potato poultry pound powder power precipitation preface prepared pressure price priest print printer prison probation process processing produce product production professor profit promotion propane property prose prosecution protest protocol pruner psychiatrist psychology ptarmigan puffin pull puma pump pumpkin punch punishment puppy purchase purple purpose push pvc pyjama pyramid quail quality quart quarter quartz queen question quicksand quiet quill quilt quince quit quiver quotation rabbi rabbit racing radar radiator radio radish raft rail railway rain rainbow raincoat rainstorm rake ramie random range rat rate raven ravioli ray rayon reaction reading reason receipt recess record recorder rectangle red reduction refrigerator refund regret reindeer relation relative religion relish reminder repair replace report representative request resolution respect responsibility rest restaurant result retailer revolve revolver reward rhinoceros rhythm rice richard riddle rifle ring rise risk river riverbed road roadway roast robert robin rock rocket rod roll romania romanian ronald roof room rooster root rose rotate route router rowboat rub rubber rugby rule run russia russian rutabaga ruth sack sagittarius sail sailboat sailor salad salary sale salesman salmon salt sampan samurai sand sandra sandwich santa sarah sardine satin saturday sauce saudi arabia sausage save saw saxophone scale scallion scanner scarecrow scarf scene scent schedule school science scissors scooter scorpio scorpion scraper screen screw screwdriver sea seagull seal seaplane search seashore season seat second secretary secure security seed seeder segment select selection self semicircle semicolon sense sentence separated september servant server session sex shade shadow shake shallot shame shampoo shape share shark sharon shears sheep sheet shelf shell shield shingle ship shirt shock shoe shoemaker shop shorts shoulder shovel show shrimp shrine siamese siberian side sideboard sidecar sidewalk sign signature silica silk silver sing singer single sink sister sister-in-law size skate skiing skill skin skirt sky slash slave sled sleep sleet slice slime slip slipper slope smash smell smile smoke snail snake sneeze snow snowboarding snowflake snowman snowplow snowstorm soap soccer society sociology sock soda sofa softball softdrink software soil soldier son song soprano sort sound soup sousaphone south africa south america south korea soy soybean space spade spaghetti spain spandex spark sparrow spear specialist speedboat sphere sphynx spider spike spinach spleen sponge spoon spot spring sprout spruce spy square squash squid squirrel stage staircase stamp star start starter state statement station statistic steam steel stem step step-aunt step-brother stepdaughter step-daughter step-father step-grandfather step-grandmother stepmother step-mother step-sister stepson step-son step-uncle steven stew stick stinger stitch stock stocking stomach stone stool stop stopsign stopwatch store storm story stove stranger straw stream street streetcar stretch string structure study sturgeon submarine substance subway success sudan suede sugar suggestion suit summer sun sunday sundial sunflower sunshine supermarket supply support surfboard surgeon surname surprise susan sushi swallow swamp swan sweater sweatshirt sweatshop swedish sweets swim swimming swing swiss switch sword swordfish sycamore syria syrup system table tablecloth tabletop tachometer tadpole tail tailor taiwan talk tank tanker tanzania target taste taurus tax taxi taxicab tea teacher teaching team technician teeth television teller temper temperature temple tempo tendency tennis tenor tent territory test text textbook texture thailand theater theory thermometer thing thistle thomas thought thread thrill throat throne thumb thunder thunderstorm thursday ticket tie tiger tights tile timbale time timer timpani tin tip tire titanium title toad toast toe toenail toilet tomato tom-tom ton tongue tooth toothbrush toothpaste top tornado tortellini tortoise touch tower town toy tractor trade traffic trail train tramp transaction transmission transport trapezoid tray treatment tree trial triangle trick trigonometry trip trombone trouble trousers trout trowel truck trumpet trunk t-shirt tsunami tub tuba tuesday tugboat tulip tuna tune turkey turkey turkish turn turnip turnover turret turtle tv twig twilight twine twist typhoon tyvek uganda ukraine ukrainian umbrella uncle underclothes underpants undershirt underwear unit united kingdom unshielded use utensil uzbekistan vacation vacuum valley value van vase vault vegetable vegetarian veil vein velvet venezuela venezuelan verdict vermicelli verse vessel vest veterinarian vibraphone vietnam view vinyl viola violet violin virgo viscose vise vision visitor voice volcano volleyball voyage vulture waiter waitress walk wall wallaby wallet walrus war warm wash washer wasp waste watch watchmaker water waterfall wave wax way wealth weapon weasel weather wedge wednesday weed weeder week weight whale wheel whip whiskey whistle white wholesaler whorl wilderness william willow wind windchime window windscreen windshield wine wing winter wire wish witch withdrawal witness wolf woman women wood wool woolen word work workshop worm wound wrecker wren wrench wrinkle wrist writer xylophone yacht yak yam yard yarn year yellow yew yogurt yoke yugoslavian zebra zephyr zinc zipper zone zoo zoology"
nouns = nouns.split(" ")
noun_prompts = ["a drawing of a " + x for x in nouns]

# Calculate features
with torch.no_grad():
    nouns_features = model.encode_text(torch.cat([clip.tokenize(noun_prompts).to(device)]))
print(nouns_features.shape, nouns_features.dtype)


100%|████████████████████████████████████████| 338M/338M [00:03<00:00, 109MiB/s]


torch.Size([2343, 512]) torch.float16


In [10]:
#@title Video Renderer {vertical-output: true}

# Render a picture with each stroke.
with torch.no_grad():
    for i in range(args.num_paths):
        print(i)
        scene_args = pydiffvg.RenderFunction.serialize_scene(\
            canvas_width, canvas_height, shapes[:i+1], shape_groups[:i+1])
        img = render(canvas_width, canvas_height, 2, 2, t, None, *scene_args)
        img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device = pydiffvg.get_device()) * (1 - img[:, :, 3:4])
        pydiffvg.imwrite(img.cpu(), '/content/res/stroke_{}.png'.format(i), gamma=gamma)
print("ffmpeging")

# Convert the intermediate renderings to a video.
from subprocess import call
call(["ffmpeg", "-y", "-framerate", "60", "-i",
    "/content/res/iter_%d.png", "-vb", "20M",
    "/content/res/out.mp4"])

call(["ffmpeg", "-y", "-framerate", "60", "-i",
    "/content/res/stroke_%d.png", "-vb", "20M",
    "/content/res/out_strokes.mp4"])

call(["ffmpeg", "-y", "-i", "/content/res/out.mp4", "-filter_complex",
    "[0]trim=0:2[hold];[0][hold]concat[extended];[extended][0]overlay",
    "/content/res/out_longer.mp4"])

call(["ffmpeg", "-y", "-i", "/content/res/out_strokes.mp4", "-filter_complex",
    "[0]trim=0:2[hold];[0][hold]concat[extended];[extended][0]overlay",
    "/content/res/out_strokes_longer.mp4"])


display(mvp.ipython_display("/content/res/out_longer.mp4"))
display(mvp.ipython_display("/content/res/out_strokes_longer.mp4"))

  skimage.io.imsave(filename, (img * 255).astype(np.uint8))

  skimage.io.imsave(filename, (img * 255).astype(np.uint8))

  skimage.io.imsave(filename, (img * 255).astype(np.uint8))

  skimage.io.imsave(filename, (img * 255).astype(np.uint8))

  skimage.io.imsave(filename, (img * 255).astype(np.uint8))

  skimage.io.imsave(filename, (img * 255).astype(np.uint8))



0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
ffmpeging


In [None]:
#@title Pixel Optimizer (Ignore) {vertical-output: true}

%cd /content/diffvg/apps/

prompt = "Underwater"
text_input = clip.tokenize(prompt).to(device)

# Calculate features
with torch.no_grad():
    text_features = model.encode_text(text_input)

import torch
import skimage
import skimage.io
import random
import ttools.modules
import argparse
import math
import torchvision
import torchvision.transforms as transforms



class ImageBase(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.p = torch.nn.Parameter(torch.ones(224, 224, 3))
    def forward(self):
        return torch.nn.functional.sigmoid(self.p)

device = torch.device('cuda')
canvas_width, canvas_height = 224, 224

augment_trans = transforms.Compose([
    transforms.RandomPerspective(fill=1, p=1),
    transforms.RandomResizedCrop(224, scale=(0.7,0.9)),
])

ib = ImageBase().to(device)

t_img = imread('https://lh5.googleusercontent.com/mjvIYutjtOGEEU2cBYuFMvCrBCg4-MGh3DqCRlLqwn5I6VvdKdtwWvAYlndQbv-VUudPcecQ_TEGFjYaTuS_r0LNI83Sp8MlXJb6OarJ9mu-IkmKPlg9Gaw3gOjQvvgvuUB5ghJjlaE')
target = torch.from_numpy(t_img).to(torch.float32)
ib.p = target

# Optimize
optim = torch.optim.Adam(ib.parameters(), lr=0.01)
# Adam iterations.
for t in range(args.num_iter):
    optim.zero_grad()
    img = ib()
    # Convert img from HWC to NCHW
    img = img.unsqueeze(0)
    img = img.permute(0, 3, 1, 2) # NHWC -> NCHW

    loss = 0
    for n in range(16):
        img_aug = augment_trans(img)
        image_features = model.encode_image(img_aug)
        loss -= torch.cosine_similarity(text_features, image_features, dim=1)
        # loss += torch.abs(torch.mean(1-img_aug)) * 0.1

    # Backpropagate the gradients.
    loss.backward()

    # Take a gradient descent step.
    optim.step()

    if t % 10 == 0:
        show_img(img.detach().cpu().numpy()[0])
        show_img(img_aug.detach().cpu().numpy()[0])
        print('render loss:', loss.item())
        print('iteration:', t)
        with torch.no_grad():
            im_norm = image_features / image_features.norm(dim=-1, keepdim=True)
            noun_norm = nouns_features / nouns_features.norm(dim=-1, keepdim=True)
            similarity = (100.0 * im_norm @ noun_norm.T).softmax(dim=-1)
            values, indices = similarity[0].topk(5)
            print("\nTop predictions:\n")
            for value, index in zip(values, indices):
                print(f"{nouns[index]:>16s}: {100 * value.item():.2f}%")

# Convert the intermediate renderings to a video.
from subprocess import call
call(["ffmpeg", "-framerate", "24", "-i",
    "results/painterly_rendering/iter_%d.png", "-vb", "20M",
    "results/painterly_rendering/out.mp4"])