In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import os

In [None]:
# I have Google Drive app for my laptop and it mirrors the folder in cloud. 
# I am running this noutbook in my laptop and the changes are synchronized.
images_folder = 'Cleansed alphabet data/new/'
new_images_folder = 'Cleansed alphabet data/reshaped/'

In [None]:
def transform_image(image, channel = 3):
    w, h = image.size
    
    if w == h:
        data = np.asanyarray(image, 'int32')
        if channels > 1:
            data = np.mean(data, axis=2)
        image = Image.fromarray(data.astype('uint8'), 'L')
        return image.resize((64, 64), Image.ANTIALIAS)
    
    d = np.random.randint(0, np.abs(w - h)//3 + 1)

    if w > h:
        k = d
        m = (w-h)-k

        if channel == 1:
            data = np.ones((w, w))*255
            data[k:-m,:] = np.asanyarray(image, 'int32')
            image = Image.fromarray(data.astype('uint8'), 'L')
        else :
            data = np.ones((w, w, channels))*255
            data[k:-m,:,:] = np.asanyarray(image, 'int32')
            image = Image.fromarray(data.astype('uint8'), 'L')            

    elif(h > w):
        k = d
        m = (h - w) - k

        if channel == 1:
            data = np.ones((h, h)) * 255
            data[:,k:-m] = np.asanyarray(image, 'int32')
            image = Image.fromarray(data.astype('uint8'), 'L')
        else :
            data = np.ones((h, h, channels)) * 255
            data[:,k:-m, :] = np.asanyarray(image, 'int32')
            image = Image.fromarray(data.astype('uint8'), 'L')

    image.resize((64, 64), Image.ANTIALIAS)
    return image
    

In [None]:
if not os.path.isdir(new_images_folder):
    os.mkdir(new_images_folder)
folders = os.listdir(images_folder)

for folder in folders:
    print(images_folder + folder + '/')
    
    if not os.path.isdir(images_folder + folder + '/'):
        continue
    image_names = os.listdir(images_folder + folder + '/')
    
    for name in image_names:
        image = Image.open(images_folder + folder + '/' + name)
        channels = len(image.getbands())
        image = transform_image(image, channels)
        
        if not os.path.isdir(new_images_folder + folder + '/'):
            os.mkdir(new_images_folder + folder + '/')
        image.save(new_images_folder + folder + '/' + name)

Cleansed alphabet data/new/New-Untitled-6/


In [None]:
folders_dir = 'Cleansed alphabet data/labelled/'
dest = 'Cleansed alphabet data/done/'

folders = os.listdir(folders_dir)

i = 48

labels = []

for folder in folders:
    
    if os.path.isfile(folder):
        continue
    
    images = os.listdir(folders_dir + folder + '/')
    
    for image in images:
        labels.append(image.split('.')[0])
        os.rename(folders_dir + folder + '/' + image, dest + 'pic_' + str(i) + '.png')
        i += 1

a = labels

file = open(dest + 'labels.txt', 'w')

for item in a:
    file.write(item + '\n')

file.close()

In [None]:
import torch
from torchvision import transforms
import pandas as pd

In [None]:
done_images_folder = 'Cleansed alphabet data/done/'

tensors = []
for i in range(0, 2000):
    image_path = done_images_folder + 'pic_{}.png'.format(i)
    
    if not os.path.isfile(image_path):
        break
    
    image = Image.open(image_path)
    
    channels = len(image.getbands())
    
    tensor = transforms.PILToTensor()(image)
    tensors.append(tensor)

all_images = torch.stack(tensors)



In [None]:
data = torch.mean(all_images.float(), axis=1)

data.shape

torch.Size([1094, 64, 64])

In [None]:
torch.save(data, done_images_folder + 'train_data.pt')

In [None]:
df = pd.read_csv(done_images_folder + 'labels.txt', header=None)

series = pd.Series.map(df[0], lambda x : trans_from_labels[x])

labels = torch.from_numpy(np.array(series))
torch.save(labels, done_images_folder + 'train_labels.pt')

In [None]:
trans = {
'A':0, 'B':1, 'G':2, 'D':3, 'E':4,  'Z':5,
'H':6, 'C':7, 'I':8, 'K':9, 'L':10, 'M':11,
'N':12,'Q':13,'O':14,'P':15,'R':16, 'S':17,
'T':18,'U':19,'F':20,'X':21,'V':22, 'W':23,
'a':24,'b':25,'g':26,'d':27,'e':28, 'z':29,
'h':30,'c':31,'i':32,'k':33,'l':34, 'm':35,
'n':36,'q':37,'o':38,'p':39,'r':40, 's':41,
't':42,'u':43,'f':44,'x':45,'v':46, 'w':47
}

trans_from_labels = {
'Alpha_':0, 'Beta_':1, 'Gamma_':2, 'Delta_':3, 'Epsilon_':4,  'Zeta_':5,
'Eta_':6, 'Theta_':7, 'Iota_':8, 'Kappa_':9, 'Lambda_':10, 'Mu_':11,
'Nu_':12,'Xi_':13,'Omicron_':14,'Pi_':15,'Rho_':16, 'Sigma_':17,
'Tau_':18,'Upsilon_':19,'Phi_':20,'Chi_':21,'Psi_':22, 'Omega_':23,
'alpha':24,'beta':25,'gamma':26,'delta':27,'epsilon':28, 'zeta':29,
'eta':30,'theta':31,'iota':32,'kappa':33,'lambda':34, 'mu':35,
'nu':36,'xi':37,'omicron':38,'pi':39,'rho':40, 'sigma':41, 'sigma__':41,
'tau':42,'upsilon':43,'phi':44,'chi':45,'psi':46, 'omega':47
}

In [None]:
print(trans_from_labels.keys())

dict_keys(['Alpha_', 'Beta_', 'Gamma_', 'Delta_', 'Epsilon_', 'Zeta_', 'Eta_', 'Theta_', 'Iota_', 'Kappa_', 'Lambda_', 'Mu_', 'Nu_', 'Xi_', 'Omicron_', 'Pi_', 'Rho_', 'Sigma_', 'Tau_', 'Upsilon_', 'Phi_', 'Chi_', 'Psi_', 'Omega_', 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', 'sigma__', 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega'])


In [None]:
data = torch.load('cropped_letters_labels/train_data.pt')

x = torch.mean(data.float(), axis=1)

torch.save(x, 'cropped_letters_labels/train_data.pt')

In [None]:
data = torch.load('cropped_letters_labels/train_data.pt')

print(data.shape)

torch.Size([462, 64, 64])
