# Sketch Quality Analysis (With CLIP)

The quickdraw dataset includes some invalid images since they were crowd sourced. (anyone could contribute to the dataset)

https://colinmorris.github.io/blog/bad_flamingos

In [None]:
from transformers import CLIPProcessor, CLIPModel

# pip install transformers

# https://huggingface.co/openai/clip-vit-large-patch14
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
import pyvips
from io import BytesIO
from PIL import Image
from IPython.display import display, HTML
from dataset import QuickDrawDataset
import torch

def svg_rasterize(svg_string: str) -> Image.Image:
    svg_bytes = svg_string.encode("utf-8")
    image = pyvips.Image.svgload_buffer(svg_bytes)
    image = image.flatten(background=0xFFFFFF)
    png_bytes = image.write_to_buffer(".png")
    img = Image.open(BytesIO(png_bytes)).convert("L")
    return img

labels = ["cat"]
training_data = QuickDrawDataset(labels=labels, download=True)
svgs = [training_data[0], training_data[8138], training_data[1234]]
rasterized_svgs = [svg_rasterize(svg) for svg in svgs]

text_inputs = ["a cat sketch", "random scribbles"]
inputs = processor(text=text_inputs, images=rasterized_svgs, return_tensors="pt", padding=True)

outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)

# Rank images for each text label
for j, text in enumerate(text_inputs):
    scores = probs[:, j]
    ranking = torch.argsort(scores, descending=True)
    print(f"\nText: '{text}'")
    label_ranking = ""

    for rank, idx in enumerate(ranking):
        label_ranking += f"<div style='display:inline-block; width: 150px; background-color: white; margin-right:10px;'><b>Ranking {rank+1}, Probability: {scores[idx]:.2f}</b><br>{svgs[idx]}</div>"

    display(HTML(label_ranking))


Downloading QuickDraw files: 100%|██████████| 1/1 [00:00<00:00, 6141.00it/s]
Loading QuickDraw files: 100%|██████████| 1/1 [00:02<00:00,  2.57s/it]



Text: 'a cat sketch'



Text: 'random scribbles'


In [3]:
svgs_inline = ""
for i in range(20):
    original_svg = training_data[i]
    svgs_inline += f'<div style="display:inline-block; width: 150px; background-color: white; margin-right:10px;"><b>Original {i}</b><br>{original_svg}</div>'

display(HTML(svgs_inline))

# What about not scribbles, but not exactly cats

`training_data[7], training_data[14]`

In [4]:
import pyvips
from io import BytesIO
from PIL import Image
from IPython.display import display, HTML
from dataset import QuickDrawDataset
import torch

def svg_rasterize(svg_string: str) -> Image.Image:
    svg_bytes = svg_string.encode("utf-8")
    image = pyvips.Image.svgload_buffer(svg_bytes)
    image = image.flatten(background=0xFFFFFF)
    png_bytes = image.write_to_buffer(".png")
    img = Image.open(BytesIO(png_bytes)).convert("L")
    return img

labels = ["cat"]
training_data = QuickDrawDataset(labels=labels, download=True)
svgs = [training_data[0], training_data[8138], training_data[1234], training_data[7], training_data[14]]
rasterized_svgs = [svg_rasterize(svg) for svg in svgs]

text_inputs = ["a cat sketch", "random scribbles"]
inputs = processor(text=text_inputs, images=rasterized_svgs, return_tensors="pt", padding=True)

outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)

# Rank images for each text label
for j, text in enumerate(text_inputs):
    scores = probs[:, j]
    ranking = torch.argsort(scores, descending=True)
    print(f"\nText: '{text}'")
    label_ranking = ""

    for rank, idx in enumerate(ranking):
        label_ranking += f"<div style='display:inline-block; width: 150px; background-color: white; margin-right:10px;'><b>Ranking {rank+1}, Probability: {scores[idx]:.2f}</b><br>{svgs[idx]}</div>"

    display(HTML(label_ranking))


Downloading QuickDraw files: 100%|██████████| 1/1 [00:00<00:00, 6678.83it/s]
Loading QuickDraw files: 100%|██████████| 1/1 [00:02<00:00,  2.49s/it]



Text: 'a cat sketch'



Text: 'random scribbles'


In [None]:
# https://github.com/KhrulkovV/geometry-score