In [1]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
#logging.basicConfig(filename='output.log',level=logging.DEBUG)
#logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG)

import numpy as np

from collections import defaultdict

def text2numpy(num, lines_list, dim, labels, l_dict=None, i_dict=None):

  
    
    X = np.zeros((num , dim))
    Y = np.zeros((num, labels))

    label_list = []

    for i,l in enumerate(lines_list[:num]):

        tokens=l.strip().split(",")
        values=tokens[1:]

        for d,v in enumerate(values):
            X[i,d] = float(v)
            
        if l_dict != None:
            Y[i, l_dict[tokens[0]]] = 1
        else:
            label_list.append(tokens[0])


    if l_dict == None:
        
        sorted_label_ind_list = [(x,i) for x,i in zip(sorted(list(set(label_list))),range(labels))]
        sorted_ind_label_list = [(i,x) for x,i in sorted_label_ind_list]

        l_dict = dict(sorted_label_ind_list)
        logging.debug("Label dict: {}".format(l_dict))
        
        i_dict = dict(sorted_ind_label_list)
        logging.debug("Index dict: {}".format(i_dict))

        for i in range(num_tr_examples):
            Y[i, l_dict[label_list[i]]] = 1

    
    
    return X,Y, l_dict, i_dict


f = open("DATASETS/letters/letter-recognition.data")

lines = f.readlines()

logging.debug("Input data sample: LABEL, V0, V1, ..., V15")
logging.debug("Input labels \in \{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', \
       'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}")

    

total_examples = len(lines)
logging.info("# Samples: {}".format(total_examples))

num_tr_examples = 16000
num_te_examples = total_examples - num_tr_examples
dim = 16
labels = 26

logging.info("# training samples: {}".format(num_tr_examples))
logging.info("# training samples: {}".format(num_te_examples))

X_tr, Y_tr, label_dict, ind_dict = text2numpy(num_tr_examples, lines, dim, labels)
X_te, Y_te, label_dict, ind_dict = text2numpy(num_te_examples, lines[num_tr_examples:],\
                                              dim, labels, label_dict, ind_dict)


logging.debug("Read examples: {}".format(X_tr[:10]))

for y in Y_tr[:10]:
    ind=np.argmax(y, axis=0)
    logging.debug("Read labels: {}".format(ind_dict[ind]))

for i in lines[:10]:
    logging.debug("Real instances: {}".format(i))

#Normalization -> From 0-15 to 0-1
X_tr = X_tr / 15.0
X_te = X_te / 15.0

logging.debug("Cheking things...{}".format(np.sum(Y_tr)))
logging.debug("Cheking things...{}".format(np.sum(Y_te)))

one_sample = X_tr[0,:].copy()
another_sample = X_tr[1,:].copy()

print((one_sample - another_sample))
print(np.sum(np.square(one_sample - another_sample)))

'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''

from __future__ import print_function

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

batch_size = 128
num_classes = labels
epochs = 100

root_dir = "DATASETS/letters/"
#base_file = "extrapolated_reconstructed_letters_Z_"
base_file = "interpolated_reconstructed_letters_Z_"
#base_file = "noise_reconstructed_letters_Z_"

x_train=np.load(root_dir + base_file + "X_training.npy")
y_train=np.load(root_dir + base_file + "Y_training.npy")
x_train=np.concatenate((X_tr, x_train), axis=0)
y_train=np.concatenate((Y_tr, y_train), axis=0)
x_test=X_te
y_test=Y_te



#x_train = x_train.reshape(60000, 784)
#x_test = x_test.reshape(10000, 784)
#x_train = x_train.astype('float32')
#x_test = x_test.astype('float32')
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(dim,)))
model.add(Dropout(0.2))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

INFO:root:# Samples: 20000
INFO:root:# training samples: 16000
INFO:root:# training samples: 4000
Using TensorFlow backend.


[-0.2        -0.26666667  0.         -0.13333333 -0.06666667 -0.13333333
  0.53333333 -0.33333333  0.13333333 -0.46666667  0.46666667 -0.06666667
 -0.13333333  0.         -0.26666667 -0.13333333]
1.11111111111
208000 train samples
4000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1024)              17408     
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 26)                26650     
Total params: 1,093,658
T

KeyboardInterrupt: 

In [2]:
'''Trains a simple deep NN on the MNIST dataset.
Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''

from __future__ import print_function

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop

batch_size = 128
num_classes = labels
epochs = 100

root_dir = "DATASETS/letters/"
base_file = "extrapolated_reconstructed_letters_Z_"
#base_file = "interpolated_reconstructed_letters_Z_"
#base_file = "noise_reconstructed_letters_Z_"

x_train=np.load(root_dir + base_file + "X_training.npy")
y_train=np.load(root_dir + base_file + "Y_training.npy")
x_train=np.concatenate((X_tr, x_train), axis=0)
y_train=np.concatenate((Y_tr, y_train), axis=0)
x_test=X_te
y_test=Y_te



#x_train = x_train.reshape(60000, 784)
#x_test = x_test.reshape(10000, 784)
#x_train = x_train.astype('float32')
#x_test = x_test.astype('float32')
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(dim,)))
model.add(Dropout(0.2))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Using TensorFlow backend.


208000 train samples
4000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1024)              17408     
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 26)                26650     
Total params: 1,093,658
Trainable params: 1,093,658
Non-trainable params: 0
_________________________________________________________________
Train on 208000 samples, validate on 4000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4

KeyboardInterrupt: 

In [3]:

# convert class vectors to binary class matrices
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(dim,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               8704      
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 26)                13338     
Total params: 284,698
Trainable params: 284,698
Non-trainable params: 0
_________________________________________________________________
Train on 32000 samples, validate on 4000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20