## Problem: Structuring your Tensorflow models

Structure the linear model using class and lazy property decorator. Please refer to this [article](https://danijar.com/structuring-your-tensorflow-models/) by Danijar Hafner.

In [7]:
import functools
import tensorflow as tf
import numpy as np
import sys
sys.path.append('./utils')

from mnist import MNIST
data = MNIST(data_dir="data/MNIST/")

def lazy_property(function, name = None, *args, **kwargs):
    attribute = '_cache_' + function.__name__
    #name = scope or function.__name__

    @property
    @functools.wraps(function)
    def decorator(self):
        if not hasattr(self, attribute):
            #with tf.variable_scope(name, *args, **kwargs):
            setattr(self, attribute, function(self))
        return getattr(self, attribute)

    return decorator

In [13]:
class Model:
    
    def __init__(self, input_size, output_size, image, target):
        self.image = image
        self.target = target
        self.input_size = input_size
        self.num_classes = output_size
        
        self.logits
        self.prediction
        self.optimize
        self.error
        
#   @lazy_property(initializer = tf.global_variable_initializer())
    @lazy_property
    def logits(self):
        x = self.image
        #weights = tf.Variable(tf.zeros([self.img_size_flat, self.num_classes]))
        weights = tf.get_variable(name = 'weight', shape = [self.input_size, self.num_classes], initializer = tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.zeros([self.num_classes]))
        self._logits = tf.matmul(x, weights) + biases
        return self._logits
        
        
    @lazy_property
    def prediction(self):
        ##### YOUR CODE START #####
        self._prediction = tf.nn.softmax(self.logits)
        return self._prediction
        ##### YOUR CODE END #####
    
    @lazy_property
    def optimize(self):
        ##### YOUR CODE START #####
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.logits, labels = self.target)
        cost = tf.reduce_mean(cross_entropy)
        learning_rate = 0.1
        self.optimizer = tf.train.AdagradOptimizer(learning_rate = learning_rate).minimize(cost)
        return self.optimizer
        ##### YOUR CODE END #####
    
    @lazy_property
    def error(self):
        ##### YOUR CODE START #####
        """ in previous code
        correct_prediction = tf.equal(y_pred_cls, y_true_cls)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))        
        
        """
        y_pred_cls = tf.argmax(self.prediction, axis = 1)
        y_true_cls = tf.argmax(self.target, axis = 1)
        wrong_prediction = tf.not_equal(y_pred_cls, y_true_cls)
        self._error = tf.reduce_mean(tf.cast(wrong_prediction, tf.float32))
        return self._error
        ##### YOUR CODE END #####

In [14]:
batch_size = 100
num_steps = 1000
    
tf.reset_default_graph()

# TODO : Model object construction
img_size_flat = 28 * 28
num_classes = 10
image = tf.placeholder(tf.float32, [None, img_size_flat])
target = tf.placeholder(tf.float32, [None, num_classes])

model = Model(img_size_flat, num_classes, image, target)


with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    
    for step in range(num_steps+1):
        # TODO : Model Optimization
        x_batch, y_batch, _ = data.random_batch(batch_size = batch_size)
        feed_dict_train = {image : x_batch, target : y_batch}
        feed_dict_test = {image : data.x_test, target : data.y_test}
        session.run(model.optimize, feed_dict_train)
        train_error = session.run(model.error, feed_dict_train)
        test_error = session.run(model.error, feed_dict_test)
        
        if (step % 100 == 0):
            print("Error rate @ iter %d: [Train] %f  [Test] %f" % (step, train_error, test_error))

Error rate @ iter 0: [Train] 0.590000  [Test] 0.744200
Error rate @ iter 100: [Train] 0.140000  [Test] 0.114400
Error rate @ iter 200: [Train] 0.110000  [Test] 0.099900
Error rate @ iter 300: [Train] 0.080000  [Test] 0.094800
Error rate @ iter 400: [Train] 0.110000  [Test] 0.090100
Error rate @ iter 500: [Train] 0.110000  [Test] 0.088900
Error rate @ iter 600: [Train] 0.070000  [Test] 0.086700
Error rate @ iter 700: [Train] 0.070000  [Test] 0.087200
Error rate @ iter 800: [Train] 0.070000  [Test] 0.087100
Error rate @ iter 900: [Train] 0.080000  [Test] 0.085300
Error rate @ iter 1000: [Train] 0.080000  [Test] 0.083900


### First Trial (Optimizer : AdagradOptimizer / Learning rate : 0.1 / batch_size : 100

Error rate @ iter 0: [Train] 0.750000  [Test] 0.841800 <br>
Error rate @ iter 100: [Train] 0.110000  [Test] 0.110700 <br>
Error rate @ iter 200: [Train] 0.110000  [Test] 0.099000 <br>
Error rate @ iter 300: [Train] 0.120000  [Test] 0.096200 <br>
Error rate @ iter 400: [Train] 0.080000  [Test] 0.089700 <br>
Error rate @ iter 500: [Train] 0.100000  [Test] 0.086800 <br>
Error rate @ iter 600: [Train] 0.060000  [Test] 0.087600 <br>
Error rate @ iter 700: [Train] 0.050000  [Test] 0.085400 <br>
Error rate @ iter 800: [Train] 0.140000  [Test] 0.085800 <br>
Error rate @ iter 900: [Train] 0.100000  [Test] 0.084200 <br>
Error rate @ iter 1000: [Train] 0.060000  [Test] 0.081000 <br>

### Another Sequential trial
optimizer : AdagradOptimizer / batch_size : 1000 / loop : 100 / test_acc : 0.9236000180244446 <br>
optimizer : AdagradOptimizer / batch_size : 1000 / loop : 100 / train_acc : 0.9253454804420471 / test_acc : 0.9246000051498413  <br>
optimizer : GradientDescentOptimizer / batch_size : 1000 / loop : 100 / train_acc : 0.9179999828338623 / test_acc : 0.9194999933242798  <br>
optimizer : GradientDescentOptimizer / batch_size : 1000 / loop : 100 / train_acc : 91.929% / test_acc : 92.010%  <br>
optimizer : GradientDescentOptimizer / learning_rate : 0.5 / batch_size : 1000 / loop : 100 / train_acc : 90.645% / test_acc : 89.840%  <br>
optimizer : GradientDescentOptimizer / learning_rate : 0.5 / batch_size : 1000 / loop : 100 / train_acc : 89.878% / test_acc : 89.130%  <br>
optimizer : GradientDescentOptimizer / learning_rate : 0.1 / batch_size : 1000 / loop : 100 / train_acc : 92.411% / test_acc : 91.400%  <br>


### Trials with weight initialization(Xavier Initialization)

Error rate @ iter 0: [Train] 0.590000  [Test] 0.744200 <br>
Error rate @ iter 100: [Train] 0.140000  [Test] 0.114400 <br>
Error rate @ iter 200: [Train] 0.110000  [Test] 0.099900 <br>
Error rate @ iter 300: [Train] 0.080000  [Test] 0.094800 <br>
Error rate @ iter 400: [Train] 0.110000  [Test] 0.090100 <br>
Error rate @ iter 500: [Train] 0.110000  [Test] 0.088900 <br>
Error rate @ iter 600: [Train] 0.070000  [Test] 0.086700 <br>
Error rate @ iter 700: [Train] 0.070000  [Test] 0.087200 <br>
Error rate @ iter 800: [Train] 0.070000  [Test] 0.087100 <br>
Error rate @ iter 900: [Train] 0.080000  [Test] 0.085300 <br>
Error rate @ iter 1000: [Train] 0.080000  [Test] 0.083900 <br>