In [1]:

import keras.datasets as datasets
import glob 
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc as misc
import h5py
import re
from random import shuffle
import tensorflow as tf
import matplotlib.image as mpimg
import scipy.special

%matplotlib inline



Using TensorFlow backend.


In [2]:
# neural network class definition
class neuralNetwork:
    
    
    # initialise the neural network
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # set number of nodes in each input, hidden, output layer
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # link weight matrices, wih and who
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc 
        self.wih = np.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
    
        # learning rate
        self.lr = learningrate
        
        # activation function is the sigmoid function
        self.activation_function = lambda x: scipy.special.expit(x)
        
        pass

    
    # train the neural network
    def train(self, inputs_list, targets_list):
        # convert inputs list to 2d array
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        # output layer error is the (target - actual)
        output_errors = targets - final_outputs
        # hidden layer error is the output_errors, split by weights, recombined at hidden nodes
        hidden_errors = np.dot(self.who.T, output_errors) 
        
        # update the weights for the links between the hidden and output layers
        self.who += self.lr * np.dot((output_errors * final_outputs * (1.0 - final_outputs)), hidden_outputs.T)
        
        # update the weights for the links between the input and hidden layers
        self.wih += self.lr * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), inputs.T)
        
        pass

    
    # query the neural network
    def query(self, inputs_list):
        # convert inputs list to 2d array
        inputs = np.array(inputs_list, ndmin=2).T
        
        # calculate signals into hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        # calculate the signals emerging from hidden layer
        hidden_outputs = self.activation_function(hidden_inputs)
        
        # calculate signals into final output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        # calculate the signals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs
    

In [3]:
def get_label(path):
    label = int(path[10:])
    if label <= 10:
        label = str(label-1)
    elif label > 10 and label <= 36:
        label = chr(label - 11 + 97)
    elif label > 36 and label <= 62:
        label = chr(label - 37 + 97)
    return label


In [4]:
samples = glob.glob('Bmp/*')
my_labels = []
labels = []
for paths in samples:  
    label = get_label(paths)
    if label == '0':
        labels.append(label)
    my_labels.append(label)





In [133]:
import time
main = glob.glob('font_data/*')
p = str()
count = 0
our_own_dataset = []
lab = 0
total = 0
init = time.time()
for x in main:
    path = glob.glob(x+'/*')
    for addrs in path:
        addrs = glob.glob(addrs+'/*')
        for addr in addrs:
            img = misc.imread(addr,flatten=True)
            img_array= misc.imresize(img,(28,28))
            img_data = img_array.reshape(784)
            # then scale data to range from 0.01 to 1.0
            #img_data = (img_data / 255.0 * 0.99) + 0.01
            # append label and image data  to test data set
            record = np.append(lab,img_data)
            our_own_dataset.append(record)
            total+=1
        lab+=1
    lab = 0
        


z = np.array(our_own_dataset)
z = z.reshape((total,785))
np.random.shuffle(z)
np.savetxt("foo.csv",z,fmt = '%d',delimiter = ',',newline = ' \n')
print(time.time() - init)


238.87795519828796


In [5]:
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 52

# learning rate
learning_rate = 0.1

# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)
data_file = open("foo.csv",'r')
data_list = data_file.readlines()
data_file.close()
training_data_list = data_list[0:int(0.995*len(data_list))]
test_data_list = data_list[int(0.995*len(data_list))+1:]



In [6]:
import time
initial = time.time()
for epoch in range(1):
    for record in training_data_list:
        # split the record by the ',' commas
        all_values = record.split(',')
        # scale and shift the inputs
        inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        # create the target output values (all 0.01, except the desired label which is 0.99)
        targets = np.zeros(output_nodes) + 0.01
        # all_values[0] is the target label for this record
        targets[int(all_values[0])] = 0.99
        n.train(inputs, targets)
        pass
print(time.time() - initial)
    

113.33702397346497


In [7]:
scorecard = []
   
# go through all the records in the test data set
for record in test_data_list:
    
    # split the record by the ',' commas
    all_values = record.split(',')
    inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01

    # correct answer is first value
    correct_label = my_labels[int(all_values[0])]
    # query the network
    outputs = n.query(inputs)
    # the index of the highest value corresponds to the label
    label = my_labels[np.argmax(outputs)]
    # append correct or incorrect to list
    # network's answer matches correct answer, add 1 to
    print("correct label",correct_label,"network",label)
    if correct_label == label:
        scorecard.append(1)
    else:
        scorecard.append(0)
    pass
pass
print(len(scorecard))
print((scorecard.count(1)/len(scorecard))*100)


correct label j network r
correct label p network r
correct label s network r
correct label j network a
correct label q network q
correct label k network r
correct label p network r
correct label m network k
correct label g network r
correct label z network r
correct label j network r
correct label b network r
correct label l network r
correct label q network q
correct label d network d
correct label o network c
correct label p network q
correct label z network z
correct label z network z
correct label y network r
correct label s network s
correct label e network e
correct label l network r
correct label c network c
correct label e network r
correct label s network d
correct label w network r
correct label u network u
correct label i network r
correct label i network r
correct label o network r
correct label y network y
correct label a network r
correct label u network r
correct label x network x
correct label j network j
correct label j network r
correct label m network r
correct labe