In [1]:
from torchvision import datasets, transforms

# MNIST Dataset
train_dataset = datasets.MNIST(root='./x/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./x/', train=False, transform=transforms.ToTensor(), download=False)

In [2]:
for row in train_dataset:
    array = row[0][0].numpy()
    break

In [3]:
import numpy as np
from PIL import Image

def show_image(array):
    pixels = (array * 255).astype(np.uint8)
    image = Image.fromarray(pixels, 'L')
    image.putdata(pixels)
    image.show()

In [4]:
def max_pool(img, factor: int):
    """
        Perform max pooling with a (factor x factor) kernel
    """
    ds_img = np.full((img.shape[0] // factor, img.shape[1] // factor), -float('inf'), dtype=img.dtype)
    np.maximum.at(ds_img, (np.arange(img.shape[0])[:, None] // factor, np.arange(img.shape[1]) // factor), img)
    return ds_img

In [5]:
show_image(array)

In [7]:
show_image(
    max_pool(array, factor=2)
)

In [8]:
30*28

840

In [9]:
16*14

224

## Array to Text

In [10]:
import sys
import math
from PIL import Image


pixels = max_pool(array, factor=2)


width = pixels.shape[0]
height = pixels.shape[1]

lines = []

for y in range(height):
    split = ['%02d down' % y]

    for x in range(width):
        brightness = pixels[y, x]

        s = '~'
        
        mBrightness = math.floor(brightness * 64)
        s = chr(mBrightness + 33)

        split.append(s)

    lines.append(' '.join(split))

print('\n'.join(lines))

reversed = []
for line in lines:
    reversed.insert(0, (line.replace(' down ', ' up ', 1)))
print('\n'.join(reversed))

00 down ! ! ! ! ! ! ! ! ! ! ! ! ! !
01 down ! ! ! ! ! ! ! ! ! ! ! ! ! !
02 down ! ! ! ! ! ! % % C L a ^ ! !
03 down ! ! ! - ` ` ` ` ` Y ` Q ! !
04 down ! ! ! % ` ` ` R ^ ! ! ! ! !
05 down ! ! ! ! $ G ` ! ! ! ! ! ! !
06 down ! ! ! ! ! # ` Y < ! ! ! ! !
07 down ! ! ! ! ! ! 5 ` ` F ! ! ! !
08 down ! ! ! ! ! ! ! % ` ` 1 ! ! !
09 down ! ! ! ! ! ! F ` ` ` ! ! ! !
10 down ! ! ! ! 1 ` ` ` ` 4 ! ! ! !
11 down ! ! L ` ` ` ` 5 ! ! ! ! ! !
12 down ! ! ` ` V B ! ! ! ! ! ! ! !
13 down ! ! ! ! ! ! ! ! ! ! ! ! ! !
13 up ! ! ! ! ! ! ! ! ! ! ! ! ! !
12 up ! ! ` ` V B ! ! ! ! ! ! ! !
11 up ! ! L ` ` ` ` 5 ! ! ! ! ! !
10 up ! ! ! ! 1 ` ` ` ` 4 ! ! ! !
09 up ! ! ! ! ! ! F ` ` ` ! ! ! !
08 up ! ! ! ! ! ! ! % ` ` 1 ! ! !
07 up ! ! ! ! ! ! 5 ` ` F ! ! ! !
06 up ! ! ! ! ! # ` Y < ! ! ! ! !
05 up ! ! ! ! $ G ` ! ! ! ! ! ! !
04 up ! ! ! % ` ` ` R ^ ! ! ! ! !
03 up ! ! ! - ` ` ` ` ` Y ` Q ! !
02 up ! ! ! ! ! ! % % C L a ^ ! !
01 up ! ! ! ! ! ! ! ! ! ! ! ! ! !
00 up ! ! ! ! ! ! ! ! ! ! ! ! ! !


## Text to Array

In [12]:
text = '''00 down ! ! ! ! ! ! ! ! ! ! ! ! ! !
01 down ! ! ! ! ! ! ! ! ! ! ! ! ! !
02 down ! ! ! ! ! ! % % C L a ^ ! !
03 down ! ! ! - ` ` ` ` ` Y ` Q ! !
04 down ! ! ! % ` ` ` R ^ ! ! ! ! !
05 down ! ! ! ! $ G ` ! ! ! ! ! ! !
06 down ! ! ! ! ! # ` Y < ! ! ! ! !
07 down ! ! ! ! ! ! 5 ` ` F ! ! ! !
08 down ! ! ! ! ! ! ! % ` ` 1 ! ! !
09 down ! ! ! ! ! ! F ` ` ` ! ! ! !
10 down ! ! ! ! 1 ` ` ` ` 4 ! ! ! !
11 down ! ! L ` ` ` ` 5 ! ! ! ! ! !
12 down ! ! ` ` V B ! ! ! ! ! ! ! !
13 down ! ! ! ! ! ! ! ! ! ! ! ! ! !'''

lines = text.split('\n')
pixels = np.zeros((len(lines), len(lines[0].split(' ')) - 2))

for y, line in enumerate(lines):
    tokens = line.split(' ')
    assert(tokens[1] == 'down')
    
    pixel_tokens = tokens[2:]
    for x, token in enumerate(pixel_tokens):
        pixels[y, x] = ord(token) - 33

show_image(
    pixels
)

# Make JSON dataset

In [13]:
def array_to_text(pixels: np.array):
    '''
        Takes a 2D array of pixel brightness, converts to text using 64 tokens to represent all brightness values.
    '''
    pixels = max_pool(pixels, factor=2)
    
    width = pixels.shape[0]
    height = pixels.shape[1]

    lines = []

    for y in range(height):
        split = ['%02d down' % y]

        for x in range(width):
            brightness = pixels[y, x]

            s = '~'

            mBrightness = math.floor(brightness * 64)
            s = chr(mBrightness + 33)

            split.append(s)

        lines.append(' '.join(split))

    reversed_lines = []
    for line in lines:
        reversed_lines.insert(0, (line.replace(' down ', ' up ', 1)))

    return ['\n'.join(lines), '\n'.join(reversed_lines)]

def text_to_array(text: str):
    lines = text.split('\n')
    pixels = np.zeros((len(lines), len(lines[0].split(' ')) - 2))

    for y, line in enumerate(lines):
        tokens = line.split(' ')
        assert(tokens[1] == 'down')
        pixel_tokens = tokens[2:]
        for x, token in enumerate(pixel_tokens):
            pixels[y, x] = ord(token) - 33

    return pixels

In [14]:
from torchvision import datasets, transforms

# MNIST Dataset
train_dataset = datasets.MNIST(root='./x/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./x/', train=False, transform=transforms.ToTensor(), download=False)

In [15]:
from tqdm import tqdm
import json

training_data = []

for tensor, label in tqdm(train_dataset):
    array = tensor.numpy()[0]
    up, down = array_to_text(array)
    training_data += [{'text': up, 'label': label}, {'text': down, 'label': label}]

with open('mnsit-text-train.json', 'w') as f:
    for line in training_data:
        f.write(json.dumps(line) + '\n')

100%|██████████| 60000/60000 [01:17<00:00, 771.42it/s] 


In [16]:
test_data = []

for tensor, label in tqdm(test_dataset):
    array = tensor.numpy()[0]
    up, down = array_to_text(array)
    test_data += [{'text': up, 'label': label}, {'text': down, 'label': label}]

with open('mnsit-text-test.json', 'w') as f:
    for line in test_data:
        f.write(json.dumps(line) + '\n')

100%|██████████| 10000/10000 [00:10<00:00, 976.85it/s]


In [17]:
from tqdm import tqdm
import json

training_data = []

for tensor, label in tqdm(train_dataset):
    array = max_pool(tensor.numpy()[0], factor=2)
    up, down = array_to_text(array)
    training_data += [{'text': up, 'label': label}, {'text': down, 'label': label}]

with open('mnsit-text-small-train.json', 'w') as f:
    for line in training_data:
        f.write(json.dumps(line) + '\n')

100%|██████████| 60000/60000 [00:26<00:00, 2251.52it/s]


In [18]:
test_data = []

for tensor, label in tqdm(test_dataset):
    array = max_pool(tensor.numpy()[0], factor=2)
    up, down = array_to_text(array)
    test_data += [{'text': up, 'label': label}, {'text': down, 'label': label}]

with open('mnsit-text-small-test.json', 'w') as f:
    for line in test_data:
        f.write(json.dumps(line) + '\n')

100%|██████████| 10000/10000 [00:04<00:00, 2230.55it/s]
