In [1]:
import numpy as np
import cairocffi as cairo
from PIL import Image
import os
import struct
from struct import unpack

In [2]:
def vector_to_raster(vector_images, side=28, line_diameter=16, padding=16, bg_color=(0,0,0), fg_color=(1,1,1)):
    """
    padding and line_diameter are relative to the original 256x256 image.
    """
    
    original_side = 256.
    
    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, side, side)
    ctx = cairo.Context(surface)
    ctx.set_antialias(cairo.ANTIALIAS_BEST)
    ctx.set_line_cap(cairo.LINE_CAP_ROUND)
    ctx.set_line_join(cairo.LINE_JOIN_ROUND)
    ctx.set_line_width(line_diameter)

    # scale to match the new size
    # add padding at the edges for the line_diameter
    # and add additional padding to account for antialiasing
    total_padding = padding * 2. + line_diameter
    new_scale = float(side) / float(original_side + total_padding)
    ctx.scale(new_scale, new_scale)
    ctx.translate(total_padding / 2., total_padding / 2.)

    raster_images = []
    for vector_image in vector_images:
        # clear background
        ctx.set_source_rgb(*bg_color)
        ctx.paint()
        
        bbox = np.hstack(vector_image).max(axis=1)
        offset = ((original_side, original_side) - bbox) / 2.
        offset = offset.reshape(-1,1)
        centered = [stroke + offset for stroke in vector_image]

        # draw strokes, this is the most cpu-intensive part
        ctx.set_source_rgb(*fg_color)        
        for xv, yv in centered:
            ctx.move_to(xv[0], yv[0])
            for x, y in zip(xv, yv):
                ctx.line_to(x, y)
            ctx.stroke()

        data = surface.get_data()
        raster_image = np.copy(np.asarray(data)[::4])
        raster_images.append(raster_image)
    
    return raster_images

In [3]:
def unpack_drawing(file_handle):
    key_id, = unpack('Q', file_handle.read(8))
    country_code, = unpack('2s', file_handle.read(2))
    recognized, = unpack('b', file_handle.read(1))
    timestamp, = unpack('I', file_handle.read(4))
    n_strokes, = unpack('H', file_handle.read(2))
    image = []
    for i in range(n_strokes):
        n_points, = unpack('H', file_handle.read(2))
        fmt = str(n_points) + 'B'
        x = unpack(fmt, file_handle.read(n_points))
        y = unpack(fmt, file_handle.read(n_points))
        image.append((x, y))

    return {
        'key_id': key_id,
        'country_code': country_code,
        'recognized': recognized,
        'timestamp': timestamp,
        'image': image
    }


def unpack_drawings(filename):
    with open(filename, 'rb') as f:
        while True:
            try:
                yield unpack_drawing(f)
            except struct.error:
                break


In [4]:
def makeImages(label_name):
    bin_name = f"binaries/full_binary_{label_name}.bin"
    if not os.path.isfile(bin_name):
        print(f"no binary file for '{bin_name}'!")
        return
    try:
        dir_name = f"images/{label_name}/"
        os.makedirs(os.path.dirname(dir_name), exist_ok=False)
        current_index = 0
        for i, drawing in enumerate(unpack_drawings(bin_name)):
            # do something with the drawing
            raster = vector_to_raster([drawing['image']])
            img_array = raster[0].reshape(28,28)
            image = Image.fromarray(img_array)
            image.save(f"{dir_name}{drawing['key_id']}.png")
            current_index = i
            if i%10000 == 0:
                print(i)
            if i > 100000:
                break
        print(current_index)
    except:
        print ( f"folder for '{label_name}' already exists!" )

In [5]:
labels = []
with open("labels.txt", "r") as file:
    labels = file.read().splitlines()

In [6]:
for label in labels:
    print(f"making images for -----{label}-----")
    makeImages(label)
    print(f"done making images for -----{label}-----")

making images for -----airplane-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----airplane-----
making images for -----apple-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----apple-----
making images for -----banana-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----banana-----
making images for -----bat-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----bat-----
making images for -----cow-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----cow-----
making images for -----dog-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----dog-----
making images for -----door-----
0
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
100001
done making images for -----door-----
makin