In [85]:
import numpy as np
import tensorflow as tf

In [136]:
def loadData():
    with np.load("notMNIST.npz") as data:
        Data, Target = data ["images"], data["labels"]
        np.random.seed(521)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        
        # flatten each image to a 784 x 1 vector
        Data = Data[randIndx]/255.
        Data = np.reshape(Data, [18724, 784])
        
        Target = Target[randIndx]
        print(Target.shape)
        
        trainData, trainTarget = Data[:15000], Target[:15000]
        validData, validTarget = Data[15000:16000], Target[15000:16000]
        testData, testTarget = Data[16000:], Target[16000:]
    
    return trainData, trainTarget, validData, validTarget, testData, testTarget

In [137]:
trainData, trainTarget, validData, validTarget, testData, testTarget = loadData()

(18724,)


In [138]:
# Create model
def neuralNet_2layers(x, weights, biases):
    # Hidden layer with RELU activation
    hidden_layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    hidden_layer_1 = tf.nn.relu(hidden_layer_1)
    # Hidden layer with RELU activation
    hidden_layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    hidden_layer_2 = tf.nn.relu(hidden_layer_2)
    # Output layer with linear activation and softmax normalization
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    out_layer = tf.nn.softmax(out_layer)
    return out_layer

In [139]:
def buildGraph_2layers(n_input, n_hidden_1, n_hidden_2, n_classes):
    weights = {
        'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
        'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
        'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden_1])),
        'b2': tf.Variable(tf.random_normal([n_hidden_2])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }
    
    # Construct model: forward propagation
    pred = neuralNet_2layers(x, weights, biases)

    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    return weights, biases, pred, cost, optimizer

In [172]:
def neuralNet_1layer(x, hidden_1, output):
    # Hidden layer with RELU activation
    hidden_layer_1 = tf.add(tf.matmul(x, hidden_1['weights']), hidden_1['bias'])
    hidden_layer_1 = tf.nn.relu(hidden_layer_1)
    # Output layer with linear activation
    out_layer = tf.matmul(hidden_layer_1, output['weights']) + output['bias']
    out_layer = tf.nn.softmax(out_layer)
    return out_layer

In [179]:
def buildGraph_1layer(learning_rate, n_input, n_hidden_1, n_classes):
#     learning_rate = 0.01;
    
    x = tf.placeholder(tf.float32, [None,784])
    y_target = tf.placeholder(tf.int32, [None,1])
    
    Lambda = tf.placeholder("float32")
    
    # Xavier initialization is used
    hidden_1 = {
        'weights': tf.Variable(
            tf.truncated_normal([n_input, n_hidden_1],
                            stddev=3.0 / (n_input + n_hidden_1))
        ),
        'bias': tf.Variable(tf.zeros([n_hidden_1]))
    }
    
    output = {
        'weights': tf.Variable(
            tf.truncated_normal([n_hidden_1, n_classes],
                            stddev=3.0 / (n_hidden_1 + n_classes))
        ),
        'bias' : tf.Variable(tf.zeros([n_classes]))
    }
    
    # Construct model: forward propagation
    pred = neuralNet_1layer(x, hidden_1, output)
#     pred = tf.argmax(pred, 1, name=None)
    

    # Define loss and optimizer
    weight_decay = Lambda * (
        tf.reduce_mean(
            tf.reduce_sum(
                tf.square(hidden_1['weights'])
            ) +  
            tf.reduce_sum(
                tf.square(output['weights'])
            )
        )
    )
    
#     y_target = tf.one_hot(y_target, 10)
    y_target = tf.reshape(y_target, [-1])
    
    print("pred: ", pred)
    print("target: ", y_target)
    
    cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=y_target))
    cost = cross_entropy + weight_decay
    
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    return x, y_target, Lambda, hidden_1, output, pred, cost, optimizer

In [180]:
x, y_target, Lambda, hidden_1, output, pred, cost, optimizer = buildGraph_1layer(0.01, 784 , 1000, 10)
# tf.get_default_graph().as_graph_def()

sess = tf.Session()
# Initializing the variables
init = tf.global_variables_initializer()
sess.run(init)

# Training hyper-parameters
B = 500
max_iter = 100
wd_lambda = 0.0000
trainLoss_list = []
validLoss_list = []
testLoss_list = []
numBatches = np.floor(15000/B)

pred:  Tensor("Softmax_27:0", shape=(?, 10), dtype=float32)
target:  Tensor("Reshape_112:0", shape=(?,), dtype=int32)


In [181]:
print ("training is about to start")
for step in range(0,max_iter):
    # sample minibatch without replacement
    if step % numBatches == 0:
        randIdx = np.arange(15000) 
        np.random.shuffle(randIdx) 
        trainData = trainData[randIdx]
        i=0
    feeddict = {x: trainData[i*B:(i+1)*B],
                y_target: trainTarget[i*B:(i+1)*B],
                Lambda: wd_lambda}

    # Update model parameters
    _, _, prediction, loss, _ = sess.run([hidden_layer_1, output_layer, 
                                        pred, cost, optimizer],
                                        feed_dict=feeddict)
    i += 1
    trainLoss_list.append(loss)
    if not (step % 10):
        print("Iter: %3d, MSE-train: %4.2f"%(step, loss))

training is about to start
Iter:   0, MSE-train: 2.30
Iter:  10, MSE-train: 2.34
Iter:  20, MSE-train: 2.35
Iter:  30, MSE-train: 2.38
Iter:  40, MSE-train: 2.35
Iter:  50, MSE-train: 2.37
Iter:  60, MSE-train: 2.38
Iter:  70, MSE-train: 2.35
Iter:  80, MSE-train: 2.37
Iter:  90, MSE-train: 2.38
Iter: 100, MSE-train: 2.35
Iter: 110, MSE-train: 2.37
Iter: 120, MSE-train: 2.38
Iter: 130, MSE-train: 2.35
Iter: 140, MSE-train: 2.37
Iter: 150, MSE-train: 2.38
Iter: 160, MSE-train: 2.35
Iter: 170, MSE-train: 2.37
Iter: 180, MSE-train: 2.38
Iter: 190, MSE-train: 2.35
Iter: 200, MSE-train: 2.37
Iter: 210, MSE-train: 2.38
Iter: 220, MSE-train: 2.35
Iter: 230, MSE-train: 2.37
Iter: 240, MSE-train: 2.38
Iter: 250, MSE-train: 2.35
Iter: 260, MSE-train: 2.37
Iter: 270, MSE-train: 2.38
Iter: 280, MSE-train: 2.35
Iter: 290, MSE-train: 2.37
Iter: 300, MSE-train: 2.38
Iter: 310, MSE-train: 2.35
Iter: 320, MSE-train: 2.37
Iter: 330, MSE-train: 2.38
Iter: 340, MSE-train: 2.35
Iter: 350, MSE-train: 2.37
I

In [171]:
# test and validation
validation_dict = {"valid":(validData, validTarget),
                    "test":(testData, testTarget)}
for dataset in validation_dict:
    data, target = validation_dict[dataset]
    err = sess.run(loss,
                    feed_dict={x: data,
                               y_target: target, 
                               Lambda: wd_lambda}
                  )
    acc = np.mean((y_predicted.eval(feed_dict={x: data,
                                               y_target: target}) > 0.5)
                                == testTarget)
    print("Final %s MSE: %.2f, acc: %.2f"%(dataset, errTest, acc_test))

TypeError: Fetch argument 2.3841789 has invalid type <class 'numpy.float32'>, must be a string or Tensor. (Can not convert a float32 into a Tensor or Operation.)