In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from time import time

In [3]:
train_path = "data/Features/Training/training_001.csv"
test_path = "data/TestFeatures/testcsv.csv"
n_input = 9 # no. of features per file

#### ReadCSV will read the sev files at train_path and test_path and return things we need.

In [4]:
def readCSV(train_path, test_path, type2=False):
    # Reading train data
    df = pd.read_csv(train_path, usecols=range(n_input))
    train_input = np.array(df.values)
    train_input = train_input.astype(np.float32, copy=False)  # Converting input to float_32
    df = pd.read_csv(train_path, usecols=(n_input,))
    temp = [elem[0] for elem in df.values]
    correct = np.array(temp)
    corr_train = np.eye(2)[correct]      # Converting to one hot
    # Reading test data
    df = pd.read_csv(test_path, usecols=range(n_input))
    test_input = np.array(df.values)
    test_input = test_input.astype(np.float32, copy=False)
    if not(type2):
        df = pd.read_csv(test_path, usecols=(n_input,))
        temp = [elem[0] for elem in df.values]
        correct = np.array(temp)
        corr_test = np.eye(2)[correct]      # Converting to one hot
    if not(type2):
        return train_input, corr_train, test_input, corr_test
    else:
        return train_input, corr_train, test_input

#### Setting up our multi layer perceptron. Note that we are using softmax in the end to convert our output in probabilities.

In [5]:
tf.reset_default_graph()
# Parameters
learning_rate = 0.001
training_epochs = 1000
display_step = 1

# Network Parameters
n_hidden_1 = 7 # 1st layer number of neurons
# n_hidden_2 = 10 # 2nd layer number of neurons
# n_hidden_3 = 30 # 3rd layer
n_classes = 2 # no. of classes (genuine or forged)

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], seed=1)),
#     'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
#     'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], seed=2))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1], seed=3)),
#     'b2': tf.Variable(tf.random_normal([n_hidden_2])),
#     'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_classes], seed=4))
}


# Create model
def multilayer_perceptron(x):
    layer_1 = tf.tanh((tf.matmul(x, weights['h1']) + biases['b1']))
#     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    out_layer = tf.tanh(tf.matmul(layer_1, weights['out']) + biases['out'])
    return out_layer

# Construct model
logits = multilayer_perceptron(X)

# Define loss and optimizer

loss_op = tf.reduce_mean(tf.squared_difference(logits, Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# For accuracies
pred = tf.nn.softmax(logits)  # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()

#### Given a train_path and test_path, evaluate will first train our model on trainingdata, then test on testingdata to final give accuracy over both.

In [6]:
def evaluate(train_path, test_path, type2=False):   
    if not(type2):
        train_input, corr_train, test_input, corr_test = readCSV(train_path, test_path)
    else:
        train_input, corr_train, test_input = readCSV(train_path, test_path, type2)
    ans = 'Random'
    with tf.Session() as sess:
        sess.run(init)
        # Training cycle
        for epoch in range(training_epochs):
            # Run optimization op (backprop) and cost op (to get loss value)
            _, cost = sess.run([train_op, loss_op], feed_dict={X: train_input, Y: corr_train})
            if cost<0.0001:
                break
#             # Display logs per epoch step
#             if epoch % 999 == 0:
#                 print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(cost))
#         print("Optimization Finished!")
        
        # Finding accuracies
        accuracy1 =  accuracy.eval({X: train_input, Y: corr_train})
#         print("Accuracy for train:", accuracy1)
#         print("Accuracy for test:", accuracy2)
        if type2 is False:
            accuracy2 =  accuracy.eval({X: test_input, Y: corr_test})
            return accuracy1, accuracy2
        else:
            prediction = pred.eval({X: test_input})
            if prediction[0][1]>prediction[0][0]:
                print('Genuine Image')
                return True
            else:
                print('Forged Image')
                return False

In [7]:
evaluate(train_path, test_path, type2=True)

Genuine Image


True

#### TrainAndTest function is to provide custom parameters for model and then give accuracies for each person in the dataset.

In [39]:
def trainAndTest(rate=0.001, epochs=1700, neurons=7, display=False):    
    start = time()

    # Parameters
    global training_rate, training_epochs, n_hidden_1
    learning_rate = rate
    training_epochs = epochs

    # Network Parameters
    n_hidden_1 = neurons # 1st layer number of neurons
    # n_hidden_2 = 7 # 2nd layer number of neurons
    # n_hidden_3 = 30 # 3rd layer

    train_avg, test_avg = 0, 0
    n = 10
    for i in range(1,n+1):
        if display:
            print("Running for Person id",i)
        temp = ('0'+str(i))[-2:]
        train_score, test_score = evaluate(train_path.replace('01',temp), test_path.replace('01',temp))
        train_avg += train_score
        test_avg += test_score
    if display:
#         print("Number of neurons in Hidden layer-", n_hidden_1)
        print("Training average-", train_avg/n)
        print("Testing average-", test_avg/n)
        print("Time taken-", time()-start)
    return train_avg/n, test_avg/n, (time()-start)/n

In [40]:
trainAndTest(neurons=7, display=True)

Running for Person id 1
Running for Person id 2
Running for Person id 3
Running for Person id 4
Running for Person id 5
Running for Person id 6
Running for Person id 7
Running for Person id 8
Running for Person id 9
Running for Person id 10
Training average- 0.95
Testing average- 0.9
Time taken- 14.721916198730469


(0.94999999999999996, 0.90000000000000002, 1.472191619873047)

In [41]:
neuron_count = range(1,8)
train_avg = []
test_avg = []
time_taken = []
k=1
# For each count runs k times and take its average to find average for neurons_count in some range
for i in neuron_count:
    a,b,c = 0,0,0
    print("Number of neurons in Hidden layer-", i)
    for j in range(k):
        p,q,r = trainAndTest(neurons=i)
        a+=p
        b+=q
        c+=r
    a/=k
    b/=k
    c/=k
#     print("Training average-", a)
#     print("Testing average -", b)
#     print("Time taken      -", c)
    train_avg.append(a)
    test_avg.append(b)
    time_taken.append(c)
    print('------------------------')

Number of neurons in Hidden layer- 1
------------------------
Number of neurons in Hidden layer- 2
------------------------
Number of neurons in Hidden layer- 3
------------------------
Number of neurons in Hidden layer- 4
------------------------
Number of neurons in Hidden layer- 5
------------------------
Number of neurons in Hidden layer- 6
------------------------
Number of neurons in Hidden layer- 7
------------------------


#### Now let's plot the average accuracies and time taken with respect to number of neurons

In [19]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [20]:
num_neurons = list(neuron_count)
Y2=[train_avg, test_avg, time_taken]
import _pickle as pickle
data = {'train':train_avg, 'test':test_avg,'time':time_taken}
with open('changingNeurons2.p', 'wb') as hand:
    pickle.dump(data, hand)
names = ['Training', 'Testing', 'Time for 10 people']
data = [dict(
        line=dict(width=2, shape='spline', smoothing=0.7),
        name = names[i],
        x = num_neurons,
        y = Y2[i]) for i in range(2)]   # if you want to plot time too put 3 else put 2



layout = dict(title="Variation of accuracy with number of neurons",
                xaxis=dict(title='Number of neurons'),
                yaxis=dict(title='Accuracy'))
fig = dict(data=data, layout=layout)

iplot(fig)