In [1]:
import tensorflow as tf
import numpy as np
import pickle
from tensorflow.python.framework import ops

In [2]:
#Load preprocessed dataset
savedPath = "../data/splittedData.pickle"

with open(savedPath, "rb") as input_file:
    dataDict = pickle.load(input_file)

testData = dataDict["testData"]
testLabels = dataDict["testLabels"]
validationData = dataDict["validationData"]
validationLabels = dataDict["validationLabels"]
trainData = dataDict["trainData"]
trainLabels = dataDict["trainLabels"]

testLabels = testLabels.astype(np.float32)
validationLabels = validationLabels.astype(np.float32)
trainLabels = trainLabels.astype(np.float32)

print("Test data shape is {} and Test labels shape is {}".format(testData.shape, testLabels.shape))
print("Validation data shape is {} and Validation labels shape is {}"
      .format(validationData.shape, validationLabels.shape))
print("Train data shape is {} and Train labels shape is {}".format(trainData.shape, trainLabels.shape))

Test data shape is (44, 20) and Test labels shape is (44,)
Validation data shape is (44, 20) and Validation labels shape is (44,)
Train data shape is (209, 20) and Train labels shape is (209,)


In [3]:
#MODEL PARAMS
NUMBER_OF_FEATURES = 20
LEARNING_RATE = 0.01
BATCH_SIZE = 8
NUM_EPOCH = 200
CHECK_GAP = 100
ALPHA = 1.0

In [4]:
ops.reset_default_graph()
sess = tf.Session()

xVals = tf.placeholder(shape=[None, NUMBER_OF_FEATURES], dtype=np.float32)
yVals = tf.placeholder(shape=[None, 1], dtype=np.float32)

weight = tf.Variable(tf.random_normal(shape=[NUMBER_OF_FEATURES, 1], mean=0.0, stddev=1))
biais = tf.Variable(tf.random_normal(shape=[1, 1], mean=0.0, stddev=1))

modelOutput = tf.add(tf.matmul(xVals, weight), biais)

#Compute the loss
weightAbs = tf.reduce_mean(tf.multiply(ALPHA, tf.abs(weight)))
origLoss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=modelOutput, labels=yVals))
loss = tf.add(weightAbs, origLoss)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)
trainStep = optimizer.minimize(loss)

#Compute Accuracy
prediction = tf.round(tf.nn.sigmoid(modelOutput))
correctPrediction = tf.cast(tf.equal(prediction, yVals), dtype=np.float32)
accuracy = tf.reduce_mean(correctPrediction)

Instructions for updating:
Colocations handled automatically by placer.


In [None]:
init = tf.global_variables_initializer()
sess.run(init)

steps = []
lossVals = []
accVals = []

currentStep = 0
trainDataSize = trainData.shape[0]
np.random.seed(seed=18)

for index in range(NUM_EPOCH) :
    indexes = np.arange(trainDataSize)
    np.random.shuffle(indexes)
    
    highIndex = 0
    while highIndex + BATCH_SIZE < trainDataSize :
        batchIndexes = indexes[highIndex:highIndex + BATCH_SIZE]
        highIndex = highIndex + BATCH_SIZE
        
        lss, _, acc = sess.run([loss, trainStep, accuracy], feed_dict={xVals: trainData[batchIndexes], 
                                                               yVals: trainLabels[batchIndexes].reshape((-1, 1))})    
        currentStep = currentStep + 1
#         print("Step {} Training: loss = {} and accuracy = {}".format(currentStep, lss, acc))
        
        if currentStep % CHECK_GAP == 0 :
            validationIndexes = np.arange(validationData.shape[0])
            np.random.shuffle(validationIndexes)
            lossVall, accVall = sess.run([loss, accuracy], feed_dict={xVals: validationData[validationIndexes], 
                                                               yVals: validationLabels[validationIndexes].reshape((-1, 1))})
            steps.append(currentStep)
            lossVals.append(lossVall)
            accVals.append(accVall)
            
            print()
            print("Step {} VALIDATION: loss = {} and accuracy = {}".format(currentStep, lossVall, accVall))
            print()

#Accuracy on test set
accTest = sess.run(accuracy, feed_dict={xVals: testData, yVals: testLabels.reshape((-1, 1))})
print("ACCURACY ON TEST SET: {}".format(accTest))



Step 100 VALIDATION: loss = 2.156432628631592 and accuracy = 0.6363636255264282


Step 200 VALIDATION: loss = 1.8107726573944092 and accuracy = 0.6590909361839294


Step 300 VALIDATION: loss = 1.5660474300384521 and accuracy = 0.6590909361839294


Step 400 VALIDATION: loss = 1.4005026817321777 and accuracy = 0.6818181872367859


Step 500 VALIDATION: loss = 1.2582061290740967 and accuracy = 0.7272727489471436


Step 600 VALIDATION: loss = 1.1559395790100098 and accuracy = 0.7272727489471436


Step 700 VALIDATION: loss = 1.0749175548553467 and accuracy = 0.75


Step 800 VALIDATION: loss = 1.0048878192901611 and accuracy = 0.7272727489471436


Step 900 VALIDATION: loss = 0.9478007555007935 and accuracy = 0.7272727489471436


Step 1000 VALIDATION: loss = 0.8986818790435791 and accuracy = 0.7272727489471436


Step 1100 VALIDATION: loss = 0.8592897057533264 and accuracy = 0.75


Step 1200 VALIDATION: loss = 0.8277779221534729 and accuracy = 0.7727272510528564



In [None]:
modelMetricDic = {"steps": steps, 
                  "lossVals": lossVals, 
                  "accVals": accVals, 
                  "accTest": accTest}
savedPath = "../data/lassoRegressionModelMetrics.pickle"
with open(savedPath, 'wb') as handle:
    pickle.dump(modelMetricDic, handle, protocol=pickle.HIGHEST_PROTOCOL)
print("Model metrics saved")