In [1]:
import os 
import numpy as np
import tensorflow as tf
import pandas as pd  
import matplotlib.pyplot as plt  
import sys
#You have freedom of using eager execution in tensorflow
#Instead of using With tf.Session() as sess you can use sess.run() whenever needed
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


In [2]:
def computeNumGrad(X,y,theta,reg): # returns approximate nabla
    # WRITEME: write your code here to complete the routine
    eps = 1e-5
    
    theta_list = [[[1,2], [3,4]], [1,2]]
    nabla_n = []
    # NOTE: you do not have to use any of the code here in your implementation...
        
    theta_plus_eps = [[[1,2], [3,4]], [1,2]]
    theta_minus_eps = [[[1,2], [3,4]], [1,2]]
    ########################################
    for i in range(len(theta)):
        param = theta[i].eval()
        
        rep = np.nditer(param, flags=['multi_index'], op_flags=['readwrite'])
        param_grad = np.ndarray(param.shape)
        while not rep.finished:
        # parameters at (x+eps) and (x-eps)
            ix = rep.multi_index
           
        # Evaluating function at x+eps i.e f(x+eps)
            for ix_ in ix:
                theta_plus_eps[i][ix_] = param[ix_] + eps
                
            f_x_plus_eps = computeCost(X,y,theta_plus_eps,reg)
        # Reseting theta
            for ix_ in ix:
                theta_list[i][ix_] = param[ix_] - eps        
        # Evaluating function at x i.e f(x-eps)
            for ix_ in ix:
                theta_minus_eps[i][ix_] = param[ix_] - eps
            f_x_minus_eps = computeCost(X,y,theta_minus_eps,reg)
        # Reseting theta
            for ix_ in ix:
                theta_list[i][ix_] = param[ix_] + eps
        # gradient at x
            
            for ix_ in ix:
                param_grad[ix_] = (f_x_plus_eps.eval() - f_x_minus_eps.eval())/(2*eps)
        # Iterating over all dimensions
            rep.iternext()
        nabla_n.append(param_grad)  
        
    ######################################
        
    return tuple(nabla_n)

In [3]:
def softmax_loss(X, y):
# Forward pass
    N = int(X.shape[0])
    exp_vals = tf.exp(X)
    probs = exp_vals / tf.reduce_sum(exp_vals, axis=1, keepdims=True)
    loss = -tf.reduce_mean(tf.log(probs))
# Backward pass
    dX = probs.eval()
        
    if isinstance(y, np.ndarray):
        y0 = y
    else:
        y0 = y.eval()
        
    for N_ in range(N):
        dX[N_][y0] -= 1
    dX /= N
    return loss, probs, dX

In [4]:
def computeGrad(X,y,theta,reg): # returns nabla
    # WRITEME: write your code here to complete the routine
    
    #######################################
    
    W, b = theta[0], theta[1]
    f = tf.matmul(X,W) + b
    _, _, df = softmax_loss(f,y)
    dW = tf.matmul(tf.transpose(X), df) + reg * W
    db = tf.reduce_sum(df, axis=0)
    
    ######################################

    return (dW,db)


In [5]:
def computeCost(X,y,theta,reg):
    # WRITEME: write your code here to complete the routine
    ########################################
    W, b = theta[0], theta[1]
    N = X.shape[0]
    XX = X.eval()
    f = np.matmul(XX, W) + b
    data_loss, _, _ = softmax_loss(f,y)
    
    reg_loss = 0.5 * reg * np.sum(np.dot(W, W))
    cost = data_loss + reg_loss
    
    
    #######################################
    
    return cost

In [6]:
def predict(X,theta):
    # WRITEME: write your code here to complete the routine
    W, b = theta[0], theta[1]

    # evaluate class scores
    scores = tf.matmul(X,W) + b
    # compute the class probabilities
    _, probs, _ = softmax_loss(scores,y)
    return scores,probs


In [7]:
np.random.seed(0) #Provide your unique Random seed
# Load in the data from disk
path = os.getcwd() + '/xor.dat'  
data = pd.read_csv(path, header=None) 

In [8]:
# set X (training data) and y (target variable)
cols = data.shape[1]  
X = data.iloc[:,0:cols-1]  
y = data.iloc[:,cols-1:cols] 

In [9]:
# convert from data frames to numpy matrices
X = np.array(X.values)  
y = np.array(y.values)
y = y.flatten()

X_tf = tf.constant(X)
Y_tf = tf.constant(y)
#Train a Linear Classifier

In [10]:
# initialize parameters randomly
D = X.shape[1]
K = np.amax(y) + 1

# initialize parameters in such a way to play nicely with the gradient-check!

initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float64)
W = tf.Variable(initializer([D, K]), dtype=tf.float64)
b = tf.Variable(tf.random_normal([K],dtype=tf.float64),dtype=tf.float64)
theta = (W,b)

# some hyperparameters
reg = 1e-3 # regularization strength

initialized = False

with tf.Session() as sess:
    
    if not initialized:
        tf.global_variables_initializer().run()
        initialized = True      
        
    nabla_n = computeNumGrad(X_tf,Y_tf,theta,reg)
    nabla = computeGrad(X_tf,Y_tf,theta,reg)
    nabla_n = list(nabla_n)
    nabla = list(nabla)
    #Initialize your variables
    for jj in range(0,len(nabla)):
        is_incorrect = 0 # set to false
        grad = nabla[jj]
        grad_n = nabla_n[jj]
        grad_sub = tf.subtract(grad_n,grad)
        grad_add = tf.add(grad_n,grad)
        err = tf.div(tf.norm(grad_sub) , (tf.norm(grad_add)))
        
        if(err.eval() > 1e-8):
            print("Param {0} is WRONG, error = {1}".format(jj, sess.run(err)))
        else:
            print("Param {0} is CORRECT, error = {1}".format(jj, sess.run(err)))


In [11]:
# Re-initialize parameters for generic training
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None, dtype=tf.float64) #You can use Xavier or Ortho for weight init
#If using other init compare that with Guassian init and report your findings
W = tf.Variable(initializer([D, K]),dtype=tf.float64)
b = tf.Variable(tf.random_normal([K],dtype=tf.float64),dtype=tf.float64)
theta = (W,b)

#play with hyperparameters for better performance 
n_e = 100 #number of epochs
check = 10 # every so many pass/epochs, print loss/error to terminal
step_size = 0.001
reg = 0.001 # regularization strength

# gradient descent loop
num_examples = X.shape[0]
with tf.Session() as sess: #You can exclude this ans use sess.run() whenever needed 
    
    tf.global_variables_initializer().run()
       
    for i in xrange(n_e):
        # WRITEME: write your code here to perform a step of gradient descent & record anything else desired for later
        
        #########################################
        
        theta = (W.eval(), b.eval())
        loss = computeCost(X_tf,Y_tf,theta,reg)
        if True:
            print("iteration %d: loss %f" % (i, loss.eval()))
        #########################################
        
        # perform a parameter update
        # WRITEME: write your update rule(s) here
        
        ##########################################
        dW, db = computeGrad(X_tf,Y_tf,theta,reg)
        W = W - step_size*dW
        b = b - step_size*db
        
        #########################################
 
# TODO: remove this line below once you have correctly implemented/gradient-checked your various sub-routines
#sys.exit(0) 

# evaluate training set accuracy
    scores, probs = predict(X,theta)
    predicted_class = sess.run(tf.argmax(scores, axis=1))
    
    print ('training accuracy: %.2f%%' % (100*np.mean(predicted_class == y)))
       

iteration 0: loss 0.693401
iteration 1: loss 0.693401
iteration 2: loss 0.693401
iteration 3: loss 0.693400
iteration 4: loss 0.693400
iteration 5: loss 0.693399
iteration 6: loss 0.693399
iteration 7: loss 0.693399
iteration 8: loss 0.693398
iteration 9: loss 0.693398
iteration 10: loss 0.693398
iteration 11: loss 0.693397
iteration 12: loss 0.693397
iteration 13: loss 0.693396
iteration 14: loss 0.693396
iteration 15: loss 0.693396
iteration 16: loss 0.693395
iteration 17: loss 0.693395
iteration 18: loss 0.693395
iteration 19: loss 0.693394
iteration 20: loss 0.693394
iteration 21: loss 0.693393
iteration 22: loss 0.693393
iteration 23: loss 0.693393
iteration 24: loss 0.693392
iteration 25: loss 0.693392
iteration 26: loss 0.693392
iteration 27: loss 0.693391
iteration 28: loss 0.693391
iteration 29: loss 0.693391
iteration 30: loss 0.693390
iteration 31: loss 0.693390
iteration 32: loss 0.693390
iteration 33: loss 0.693389
iteration 34: loss 0.693389
iteration 35: loss 0.693388
it