In [1]:
import numpy as np
import os
import pickle
from PIL import Image

In [2]:
data_dir = 'by_class'
home_dir = os.getcwd()
hex_chars = ['30', '31', '32', '33', '34', '35', '36', '37', '38', '39']

def load_pickle_file(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

def save_pickle_file(filename, data):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)

def hex_to_decimal(hex_char):
    return int(hex_char, 16)

In [3]:
def get_images_from_dir(images_dir):
    filenames = os.listdir(images_dir)
    images = np.empty([len(filenames), 128, 128], dtype=np.uint8)

    for i, f in enumerate(filenames):
        image_dir = f'{images_dir}/{f}'
        image = Image.open(image_dir).convert('L')
        image = np.array(image)
        
        images[i] = image

    return images

In [4]:
def get_num_images_in_dir(directory):
    filenames = os.listdir(directory)
    return len(filenames)

In [5]:
def get_num_images(typ):
    num_train_images = 0
    for hex_char in hex_chars:
        if typ == 'train':
            directory = f'{home_dir}/{data_dir}/{hex_char}/train_{hex_char}'
        elif typ == 'test':
            directory = f'{home_dir}/{data_dir}/{hex_char}/hsf_4'
        else:
            return
        num_train_images += get_num_images_in_dir(directory)
    
    return num_train_images


In [11]:
def save_images(typ):
    if typ not in ['train', 'test']: return

    num_total_images = get_num_images(typ)

    filename = f'{typ}_images_nist.pkl'

    all_images = np.empty([num_total_images, 128, 128], dtype=np.uint8)

    save_pickle_file(filename, all_images)

    total_count = 0
    for hex_char in hex_chars:
        all_images = load_pickle_file(filename)

        if typ == 'train':
            directory = f'{home_dir}/{data_dir}/{hex_char}/train_{hex_char}'
        elif typ == 'test':
            directory = f'{home_dir}/{data_dir}/{hex_char}/hsf_4'

        images = get_images_from_dir(directory)
        num_images_for_hex = np.shape(images)[0]
        print(num_images_for_hex)

        all_images[total_count:(total_count+num_images_for_hex)] = images

        save_pickle_file(filename, all_images)

        total_count += num_images_for_hex

        print('finished', hex_char)

In [12]:
def save_labels(typ):
    if typ not in ['train', 'test']: return

    num_total_images = get_num_images(typ)

    filename = f'{typ}_labels_nist.pkl'

    labels = np.empty([num_total_images], dtype=np.uint8)

    total_count = 0
    for hex_char in hex_chars:
        if typ == 'train':
            directory = f'{home_dir}/{data_dir}/{hex_char}/train_{hex_char}'
        elif typ == 'test':
            directory = f'{home_dir}/{data_dir}/{hex_char}/hsf_4'

        num_images_for_hex = get_num_images_in_dir(directory)

        labels[total_count:(total_count+num_images_for_hex)] = hex_chars.index(hex_char)
        print(labels[total_count:(total_count+num_images_for_hex)])

        total_count += num_images_for_hex
    
    save_pickle_file(filename, labels)

In [13]:
def test_labels(typ):
    if typ not in ['train', 'test']: return

    num_total_images = get_num_images(typ)

    filename = f'{typ}_labels_nist.pkl'

    labels = load_pickle_file(filename)

    total_count = 0
    for hex_char in hex_chars:
        if typ == 'train':
            directory = f'{home_dir}/{data_dir}/{hex_char}/train_{hex_char}'
        elif typ == 'test':
            directory = f'{home_dir}/{data_dir}/{hex_char}/hsf_4'

        num_images_for_hex = get_num_images_in_dir(directory)
        print(num_images_for_hex)

        i = total_count+num_images_for_hex

        print(hex_char)
        try:
            print(labels[i-1], labels[i], labels[i+1])
        except IndexError:
            print(labels[i-1])

        total_count += num_images_for_hex
    

In [19]:
test_labels('test')

5560
30
0 1 1
6655
31
1 2 2
5888
32
2 3 3
5819
33
3 4 4
5722
34
4 5 5
5539
35
5 6 6
5858
36
6 7 7
6097
37
7 8 8
5695
38
8 9 9
5813
39
9


In [1]:
def save_images_flattened(typ):
    if typ not in ['train', 'test']: return

    num_total_images = get_num_images(typ)

    filename = f'{typ}_images.pkl'

    all_images = load_pickle_file(filename)
    new_images = np.empty([num_total_images, 128*128], dtype=bool)

    for i, image in enumerate(all_images):
        new_image = np.reshape(image, [128*128])
        new_images[i] = new_image
    
    save_pickle_file(f'{typ}_images_flattened.pkl', new_images)

In [17]:
arr = np.array([[255, 0, 255],
                [0, 255, 255],
                [255, 0, 0]], dtype=np.uint8)

print(arr)

new_arr = np.reshape(arr, 9)
print(new_arr)

new_arr[new_arr == 255] = 1
print(new_arr)

print(new_arr.dtype)
new_arr = new_arr.astype(np.bool_)
print(new_arr)
print(new_arr.dtype)

[[255   0 255]
 [  0 255 255]
 [255   0   0]]
[255   0 255   0 255 255 255   0   0]
[1 0 1 0 1 1 1 0 0]
uint8
[ True False  True False  True  True  True False False]
bool
