## Part 1: Building and Training a ConvNet on SVHN
(25 points)

First we provide demo code that trains a convolutional network on the [SVHN Dataset](http://ufldl.stanford.edu/housenumbers/).. 

You will need to download   __Format 2__ from the link above.
- Create a directory named `svhn_mat/` in the working directory. Or, you can create it anywhere you want, but change the path in `svhn_dataset_generator` to match it.
- Download `train_32x32.mat` and `test_32x32.mat` to this directory.
- `extra_32x32.mat` is NOT needed.
- You may find the `wget` command useful for downloading on linux. 



The following defines a generator for the SVHN Dataset, yielding the next batch every time next is invoked.

In [None]:
import os
import math
import numpy as np
import scipy
import scipy.io
import tensorflow as tf

from six.moves import range
import read_data


print_every = 20
epoch_n = 50

################################## Loading Date ###############################

@read_data.restartable
def svhn_dataset_generator(dataset_name, batch_size):
    assert dataset_name in ['train', 'test']
    assert batch_size > 0 or batch_size == -1  # -1 for entire dataset
    
    path = './svhn_mat/' # path to the SVHN dataset you will download in Q1.1
    file_name = '%s_32x32.mat' % dataset_name
    file_dict = scipy.io.loadmat(os.path.join(path, file_name))
    X_all = file_dict['X'].transpose((3, 0, 1, 2))
    y_all = file_dict['y']
    data_len = X_all.shape[0]
    batch_size = batch_size if batch_size > 0 else data_len
    
    X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)
    y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)
    y_all_padded[y_all_padded == 10] = 0
    
    for slice_i in range(int(math.ceil(data_len / batch_size))):
        idx = slice_i * batch_size
        X_batch = X_all_padded[idx:idx + batch_size]
        y_batch = np.ravel(y_all_padded[idx:idx + batch_size])
        yield X_batch, y_batch

The following defines the CovNet Model. It has two identical conv layers with 32 5x5 convlution filters, followed by a fully-connected layer to output the logits.

In [None]:
################################### ConvNet Model #############################

def cnn_map(x_):
    conv1 = tf.layers.conv2d(
            inputs=x_,
            filters=32,  # number of filters
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu)
    
    pool1 = tf.layers.max_pooling2d(inputs=conv1, 
                                    pool_size=[2, 2], 
                                    strides=2)  # stride [2,2]
    
    conv2 = tf.layers.conv2d(
            inputs=pool1,
            filters=32, # number of filters
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu)
    
    pool2 = tf.layers.max_pooling2d(inputs=conv2, 
                                    pool_size=[2, 2], 
                                    strides=2)  # stride [2,2]
        
    pool_flat = tf.contrib.layers.flatten(pool2, scope='pool2flat')
    dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)
    logits = tf.layers.dense(inputs=dense, units=10)
    return logits

### Q1.2 Training SVHN Net
Now we train a `cnn_map` net on Format 2 of the SVHN Dataset. We will call this "SVHN net". 

**Note:** training will take a while, so you might want to use GPU.

In [None]:
################################## Training Model #############################
dataset_generators = {
        'train': svhn_dataset_generator('train', 256),
        'test': svhn_dataset_generator('test', 256)
}

with tf.device("/gpu:0"):  # use gpu:0 if on GPU
   sess = tf.InteractiveSession()
   x_ = tf.placeholder(tf.float32, [None, 32, 32, 3])
   y_ = tf.placeholder(tf.int32, [None])
   y_logits = cnn_map(x_)
           
   y_dict = dict(labels=y_, logits=y_logits)
   losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict)
   cross_entropy_loss = tf.reduce_mean(losses)
   trainer = tf.train.AdamOptimizer()
   train_op = trainer.minimize(cross_entropy_loss)
            
   y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)
   correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_)
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   sess.run(tf.global_variables_initializer())
   
   for epoch_i in range(epoch_n):
       for iter_i, data_batch in enumerate(dataset_generators['train']):
           train_op.run(feed_dict={x_: data_batch[0], y_: data_batch[1]})  
           if iter_i % print_every == 0:
                collect = []
                arrloss = []
                for test_batch in dataset_generators['test']:
                    collect.append(accuracy.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                    arrloss.append(cross_entropy_loss.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))

                test_acc = np.mean(collect, axis=0)
                test_loss = np.mean(arrloss, axis=0)
                print("iter %d    epoch %d   test accuracy %.3f   loss %.3f" %(iter_i, epoch_i, test_acc, test_loss) )

### Q1.3 SVHN Net Variations
Now we vary the structure of the network. To keep things simple, we still use  two identical conv layers, but vary their parameters. 

Report the final test accuracy on 3 different number of filters, and 3 different number of strides. Each time when you vary one parameter, keep the other fixed at the original value.

|Stride (with filters=32)|Accuracy|
|--|-------------------------------|
| 2 | 0.828 |
| 3 | 0.840 |
| 4 | 0.805 |

|Filters (with stride=2)|Accuracy|
|--|-------------------------------|
| 20 | 0.830 |
|  30| 0.829 |
|  40| 0.820 |

A template for one sample modification is given below. 

**Note:** you're welcome to decide how many training epochs to use, if that gets you the same results but faster.

In [None]:
print_every = 20
epoch_n = 30
Numfilt  =  32
Numstride  = 3

################################### ConvNet Model #############################

def cnn_map(x_,Numfilt,Numstride):
    conv1 = tf.layers.conv2d(
            inputs=x_,
            filters = Numfilt,  # number of filters
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu)
    
    pool1 = tf.layers.max_pooling2d(inputs=conv1, 
                                    pool_size=[2, 2], 
                                    strides=Numstride)  
    
    conv2 = tf.layers.conv2d(
            inputs=pool1,
            filters=Numfilt, # number of filters
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu)
    
    pool2 = tf.layers.max_pooling2d(inputs=conv2, 
                                    pool_size=[2, 2], 
                                    strides=Numstride)  
        
    pool_flat = tf.contrib.layers.flatten(pool2, scope='pool2flat')
    dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)

    	
    logits = tf.layers.dense(inputs=dense, units=10)
    return logits
    
################################## Training Model ########################################

dataset_generators = {
        'train': svhn_dataset_generator('train', 256),
        'test': svhn_dataset_generator('test', 256)
}

with tf.device("/gpu:0"):  # use gpu:0 if on GPU
   sess = tf.InteractiveSession()
   x_ = tf.placeholder(tf.float32, [None, 32, 32, 3])
   y_ = tf.placeholder(tf.int32, [None])
   y_logits = cnn_map(x_,Numfilt,Numstride) 
           
   y_dict = dict(labels=y_, logits=y_logits)
   losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict)
   cross_entropy_loss = tf.reduce_mean(losses)
   trainer = tf.train.AdamOptimizer()
   train_op = trainer.minimize(cross_entropy_loss)
            
   y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)
   correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_)
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   sess.run(tf.global_variables_initializer())
   
   for epoch_i in range(epoch_n):
        for iter_i, data_batch in enumerate(dataset_generators['train']):
            train_op.run(feed_dict={x_: data_batch[0], y_: data_batch[1]})  

            if iter_i % print_every == 0:
                collect = []
                arrloss = []
            	
                for test_batch in dataset_generators['test']:
                    collect.append(accuracy.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                    arrloss.append(cross_entropy_loss.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
  
                test_acc = np.mean(collect, axis=0)
                test_loss = np.mean(arrloss, axis=0)
                print("iter %d    epoch %d   test accuracy %.3f   loss %.3f" %(iter_i, epoch_i, test_acc, test_loss) )

## Part 2: Saving and Reloading Model Weights
(25 points)

In this section you learn to save the weights of a trained model, and to load the weights of a saved model. This is really useful when we would like to load an already trained model in order to continue training or to fine-tune it. Often times we save “snapshots” of the trained model as training progresses in case the training is interrupted, or in case we would like to fall back to an earlier model, this is called snapshot saving.

### Q2.1 Defining another network
Define a network with a slightly different structure in `def cnn_expanded(x_)` below. `cnn_expanded` is an expanded version of `cnn_model`. 
It should have: 
- a different size of kernel for the last convolutional layer, 
- followed by one additional convolutional layer, and 
- followed by one additional pooling layer.

The last fully-connected layer will stay the same.

In [None]:
def cnn_map(x_,NumCovLayer,Numfilt,Numstride,KernelSize):

    for i in range(NumCovLayer):
        with tf.name_scope('layer'+str(i)):
            conv = tf.layers.conv2d(
                    inputs=x_,
                    filters = Numfilt,  
                    kernel_size=[KernelSize[i], KernelSize[i]],
                    padding="same",
                    activation=tf.nn.relu)
    
            pool = tf.layers.max_pooling2d(inputs=conv,pool_size=[2, 2],strides=Numstride)    
        
    pool_flat = tf.contrib.layers.flatten(pool, scope='poolflat')
    dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)


    logits = tf.layers.dense(inputs=dense, units=10)
    return logits
    

### Q2.2 Saving and Loading Weights
`new_train_model()` below has two additional parameters `save_model=False, load_model=False` than `train_model` defined previously. Modify `new_train_model()` such that it would 
- save weights after the training is complete if `save_model` is `True`, and
- load weights on start-up before training if `load_model` is `True`.

*Hint:*  take a look at the docs for `tf.train.Saver()` here: https://www.tensorflow.org/api_docs/python/tf/train/Saver#__init__. You probably will be specifying the first argument `var_list` to accomplish this question.

Note: if you are unable to load weights into `cnn_expanded` network, use `cnn_map` in order to continue the assingment.

In [None]:
print_every = 20
epoch_n = 25
Numfilt  =  32
Numstride  = 3
KernelSize = [5,5,3,3]
NumConvLayer = 4

load_model= True
save_model = False
change_model = True

LoadFile = "weight/q22.ckpt"
SaveFile = "weight/q22.ckpt" 

################################## Loading Date ###############################

@read_data.restartable
def svhn_dataset_generator(dataset_name, batch_size):
    assert dataset_name in ['train', 'test']
    assert batch_size > 0 or batch_size == -1  # -1 for entire dataset
    
    path = './svhn_mat/' # path to the SVHN dataset you will download in Q1.1
    file_name = '%s_32x32.mat' % dataset_name
    file_dict = scipy.io.loadmat(os.path.join(path, file_name))
    X_all = file_dict['X'].transpose((3, 0, 1, 2))
    y_all = file_dict['y']
    data_len = X_all.shape[0]
    batch_size = batch_size if batch_size > 0 else data_len
    
    X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)
    y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)
    y_all_padded[y_all_padded == 10] = 0
    
    for slice_i in range(int(math.ceil(data_len / batch_size))):
        idx = slice_i * batch_size
        X_batch = X_all_padded[idx:idx + batch_size]
        y_batch = np.ravel(y_all_padded[idx:idx + batch_size])
        yield X_batch, y_batch
        
        
################################### ConvNet Model #############################

def cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize):

    filt = []
    bias = []
    for i in range(NumConvLayer):
        with tf.variable_scope('layer'+str(i)):
            conv = tf.layers.conv2d(
                    inputs=x_,
                    filters = Numfilt,  
                    kernel_size=[KernelSize[i], KernelSize[i]],
                    padding="same",
                    activation=tf.nn.relu,
                    name = "conv")
            tf.get_variable_scope().reuse_variables()
            filt.append(tf.get_variable(name = 'conv/kernel'))
            bias.append(tf.get_variable(name = 'conv/bias'))
    
            pool = tf.layers.max_pooling2d(inputs=conv, 
                                           pool_size=[2, 2], 
                                           strides=Numstride)   
        
        pool_flat = tf.contrib.layers.flatten(pool, scope='poolflat')
        dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)
        logits = tf.layers.dense(inputs=dense, units=10)
        output = [filt, bias, logits]
    return output
    
################################## Training Model ########################################

dataset_generators = {
        'train': svhn_dataset_generator('train', 256),
        'test': svhn_dataset_generator('test', 256)
}

with tf.device("/gpu:0"):  # use gpu:0 if on GPU
    x_ = tf.placeholder(tf.float32, [None, 32, 32, 3])
    y_ = tf.placeholder(tf.int32, [None])
    filt, bias, y_logits = cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize)
           
    y_dict = dict(labels=y_, logits=y_logits)
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict)
    cross_entropy_loss = tf.reduce_mean(losses)
    trainer = tf.train.AdamOptimizer()
    train_op = trainer.minimize(cross_entropy_loss)
            
    y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)
    correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


saver = tf.train.Saver()  
if change_model == True :
    shared_variables = {"layer0/conv/kernel":filt[0],
                        "layer0/conv/bias":bias[0]}
    saver = tf.train.Saver(shared_variables)
    
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
   
if load_model==True:
    saver.restore(sess,LoadFile)
    print('************** Weight is loaded ***************')
   		
for epoch_i in range(epoch_n):
    for iter_i, data_batch in enumerate(dataset_generators['train']):
        train_op.run(feed_dict={x_: data_batch[0], y_: data_batch[1]})  

        if iter_i % print_every == 0:
            collect = []
            arrloss = []
            	
            for test_batch in dataset_generators['test']:
                collect.append(accuracy.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                arrloss.append(cross_entropy_loss.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                    
                    
                            
            test_acc = np.mean(collect, axis=0)
            test_loss = np.mean(arrloss, axis=0)
            print("iter %d    epoch %d   test accuracy %.3f   loss %.3f" %(iter_i, epoch_i, test_acc, test_loss) )
                
if save_model==True:
    path = saver.save(sess,SaveFile)
    print('***************** Weight is saved ****************')

## Part 3: Fine-tuning a Pre-trained Network on CIFAR-10
(20 points)

[CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) is another popular benchmark for image classification.
We provide you with modified verstion of the file cifar10.py from [https://github.com/Hvass-Labs/TensorFlow-Tutorials](https://github.com/Hvass-Labs/TensorFlow-Tutorials).


In [None]:
import read_cifar10 as cf10

We also provide a generator for the CIFAR-10 Dataset, yielding the next batch every time next is invoked.

In [None]:
@read_data.restartable
def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000):
    assert dataset_name in ['train', 'test']
    assert batch_size > 0 or batch_size == -1  # -1 for entire dataset
    
    X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train'
                                 else cf10.load_test_data())
    
    actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10)
    X_all = X_all_unrestricted[:actual_restrict_size]
    data_len = X_all.shape[0]
    batch_size = batch_size if batch_size > 0 else data_len
    
    X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)
    y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)
    
    for slice_i in range(math.ceil(data_len / batch_size)):
        idx = slice_i * batch_size
        #X_batch = X_all_padded[idx:idx + batch_size]
        X_batch = X_all_padded[idx:idx + batch_size]*255  # bugfix: thanks Zezhou Sun!
        y_batch = np.ravel(y_all_padded[idx:idx + batch_size])
        yield X_batch.astype(np.uint8), y_batch.astype(np.uint8)

cifar10_dataset_generators = {
    'train': cifar10_dataset_generator('train', 1000),
    'test': cifar10_dataset_generator('test', -1)
}


### Q3.1 Fine-tuning
Let's fine-tune SVHN net on **1000 examples** from CIFAR-10. 
Compare test accuracies of the following scenarios: 
  - Training `cnn_map` from scratch on the 1000 CIFAR-10 examples
  - Fine-tuning SVHN net (`cnn_map` trained on SVHN dataset) on 1000 exampes from CIFAR-10. Use `new_train_model()` defined above to load SVHN net weights, but train on the CIFAR-10 examples.
  
**Important:** please do not change the `restrict_size=1000` parameter.

In [None]:

print_every = 20
epoch_n = 500
Numfilt  =  32
Numstride  = 2
KernelSize = [5,5,3,3]
NumConvLayer = 4

load_model= True
save_model = True
change_model = True

LoadFile = "weight/q22.ckpt"
SaveFile = "Q3/q3.ckpt"

################################## Loading Date ###############################

@read_data.restartable
def cifar10_dataset_generator(dataset_name, batch_size, restrict_size=1000):
    assert dataset_name in ['train', 'test']
    assert batch_size > 0 or batch_size == -1  # -1 for entire dataset
    
    X_all_unrestricted, y_all = (cf10.load_training_data() if dataset_name == 'train'
                                 else cf10.load_test_data())
    
    actual_restrict_size = restrict_size if dataset_name == 'train' else int(1e10)
    X_all = X_all_unrestricted[:actual_restrict_size]
    data_len = X_all.shape[0]
    batch_size = batch_size if batch_size > 0 else data_len
    
    X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)
    y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)
    
    for slice_i in range(math.ceil(data_len / batch_size)):
        idx = slice_i * batch_size
        X_batch = X_all_padded[idx:idx + batch_size]*255  
        y_batch = np.ravel(y_all_padded[idx:idx + batch_size])
        yield X_batch.astype(np.uint8), y_batch.astype(np.uint8)
        
################################### ConvNet Model #############################

def cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize):

    filters = []
    bias = []
    for i in range(NumConvLayer):
        with tf.variable_scope('layer'+str(i)):
            conv = tf.layers.conv2d(
                    inputs=x_,
                    filters = Numfilt,  
                    kernel_size=[KernelSize[i], KernelSize[i]],
                    padding="same",
                    activation=tf.nn.relu,
                    name = "conv")
            tf.get_variable_scope().reuse_variables()
            filters.append(tf.get_variable(name = 'conv/kernel'))
            bias.append(tf.get_variable(name = 'conv/bias'))
    
            pool = tf.layers.max_pooling2d(inputs=conv, 
                                           pool_size=[2, 2], 
                                           strides=Numstride)   
        
        pool_flat = tf.contrib.layers.flatten(pool, scope='poolflat')
        dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)
        logits = tf.layers.dense(inputs=dense, units=10)
        output = [filters, bias, logits]
    return output
    
################################## Training Model #############################

cifar10_dataset_generators = {
    'train': cifar10_dataset_generator('train', 1000),
    'test': cifar10_dataset_generator('test', -1)
}

with tf.device("/gpu:0"):  # use gpu:0 if on GPU
    x_ = tf.placeholder(tf.float32, [None, 32, 32, 3])
    y_ = tf.placeholder(tf.int32, [None])
    filters, bias, y_logits = cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize)
           
    y_dict = dict(labels=y_, logits=y_logits)
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict)
    cross_entropy_loss = tf.reduce_mean(losses)
    trainer = tf.train.AdamOptimizer()
    train_op = trainer.minimize(cross_entropy_loss)
            
    y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)
    correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


saver = tf.train.Saver()  
if change_model == True :
    shared_variables = {"layer0/conv/kernel":filters[0],
                        "layer0/conv/bias":bias[0]}
    saver = tf.train.Saver(shared_variables)
    
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
   
if load_model==True:
    saver.restore(sess,LoadFile)
    print('**************** Weight is loaded *****************')
   		
for epoch_i in range(epoch_n):
    for iter_i, data_batch in enumerate(cifar10_dataset_generators['train']):
        train_op.run(feed_dict={x_: data_batch[0], y_: data_batch[1]})  

        if iter_i % print_every == 0:
            collect = []
            arrloss = []
            	
            for test_batch in cifar10_dataset_generators['test']:
                collect.append(accuracy.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                arrloss.append(cross_entropy_loss.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                    
                    
                            
            test_acc = np.mean(collect, axis=0)
            test_loss = np.mean(arrloss, axis=0)
            print("iter %d    epoch %d   test accuracy %.3f   loss %.3f" %(iter_i, epoch_i, test_acc, test_loss) )
                
if save_model==True:
    path = saver.save(sess,SaveFile)
    print('***************** Weight is saved *******************')

**I send the results in the Q4 file. **  

## Part 4: TensorBoard
(30 points)

[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard) is a very helpful tool for visualization of neural networks. 

### Q4.1 Plotting
Present at least one visualization for each of the following:
  - Filters
  - Loss
  - Accuracy

Modify code you have wrote above to also have summary writers. To  run tensorboard, the command is `tensorboard --logdir=path/to/your/log/directory`.

In [None]:
print_every = 20
epoch_n = 50
Numfilt  =  32
Numstride  = 2
KernelSize = [5,5,3]
NumConvLayer = 3



################################## Loading Date ###############################

@read_data.restartable
def svhn_dataset_generator(dataset_name, batch_size):
    assert dataset_name in ['train', 'test']
    assert batch_size > 0 or batch_size == -1  # -1 for entire dataset
    
    path = './svhn_mat/' # path to the SVHN dataset you will download in Q1.1
    file_name = '%s_32x32.mat' % dataset_name
    file_dict = scipy.io.loadmat(os.path.join(path, file_name))
    X_all = file_dict['X'].transpose((3, 0, 1, 2))
    y_all = file_dict['y']
    data_len = X_all.shape[0]
    batch_size = batch_size if batch_size > 0 else data_len
    
    X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)
    y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)
    y_all_padded[y_all_padded == 10] = 0
    
    for slice_i in range(int(math.ceil(data_len / batch_size))):
        idx = slice_i * batch_size
        X_batch = X_all_padded[idx:idx + batch_size]
        y_batch = np.ravel(y_all_padded[idx:idx + batch_size])
        yield X_batch, y_batch
        
        
################################### ConvNet Model #############################

def cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize):

    filt = []
    bias = []
    for i in range(NumConvLayer):
        with tf.variable_scope('layer'+str(i)):
            conv = tf.layers.conv2d(
                    inputs=x_,
                    filters = Numfilt,  
                    kernel_size=[KernelSize[i], KernelSize[i]],
                    padding="same",
                    activation=tf.nn.relu,
                    name = "conv")
            
            
            tf.get_variable_scope().reuse_variables()
            filt.append(tf.get_variable(name = 'conv/kernel'))
            bias.append(tf.get_variable(name = 'conv/bias'))
            
            
    
            pool = tf.layers.max_pooling2d(inputs=conv, 
                                           pool_size=[2, 2], 
                                           strides=Numstride)   
        
        pool_flat = tf.contrib.layers.flatten(pool, scope='poolflat')
        dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)
        logits = tf.layers.dense(inputs=dense, units=10)
        output = [filt, bias, logits]
    return output
    
################################## Training Model ########################################

dataset_generators = {
        'train': svhn_dataset_generator('train', 256),
        'test': svhn_dataset_generator('test', 256)
}

with tf.device("/gpu:0"):  # use gpu:0 if on GPU
    x_ = tf.placeholder(tf.float32, [None, 32, 32, 3])
    y_ = tf.placeholder(tf.int32, [None])
    filt, bias, y_logits = cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize)
           
    y_dict = dict(labels=y_, logits=y_logits)
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict)

    cross_entropy_loss = tf.reduce_mean(losses)
    trainer = tf.train.AdamOptimizer()
    train_op = trainer.minimize(cross_entropy_loss)
            
    y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)
    correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
with tf.device("/cpu:0"):    
    tf.summary.scalar('accurac',accuracy)
    tf.summary.scalar('loss',cross_entropy_loss)
    for i in range(NumConvLayer):
        image_reshape = tf.reshape(filt[i], [-1, KernelSize[i], KernelSize[i], 3])
        tf.summary.image('filter'+str(i), image_reshape, Numfilt)
    
sess = tf.InteractiveSession()

merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('summary_q4/')
sess.run(tf.global_variables_initializer())

for epoch_i in range(epoch_n):
    for iter_i, data_batch in enumerate(dataset_generators['train']):
        train_op.run(feed_dict={x_: data_batch[0], y_: data_batch[1]})  

        if iter_i % print_every == 0:
            collect = []
            arrloss = []
            for test_batch in dataset_generators['test']:
                collect.append(accuracy.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                arrloss.append(cross_entropy_loss.eval(feed_dict={x_: test_batch[0], y_: test_batch[1]}))
                summary= sess.run(merged, feed_dict={x_: test_batch[0], y_: test_batch[1]}) 
                writer.add_summary(summary, epoch_i*280 + iter_i)
            
        
                    
                            
            test_acc = np.mean(collect, axis=0)
            test_loss = np.mean(arrloss, axis=0)
            print("iter %d    epoch %d   test accuracy %.3f   loss %.3f" %(iter_i, epoch_i, test_acc, test_loss) )

## Part 5: Bonus
(20 points)

### Q5.1 SVHN Net ++
Improve the accuracy of SVHN Net beyond that of the provided demo: SVHN Net ++.

In [None]:
print_every = 20
epoch_n = 50
Numfilt  =  32
Numstride  = [2,2,4,4]
KernelSize = [3,3,5,5]
NumConvLayer = 4
drop_out_ = 0.7;

load_model= False
save_model = True
change_model = False

LoadFile = "Q5/q5.ckpt"
SaveFile = "Q5/q5.ckpt" 

################################## Loading Date ###############################

@read_data.restartable
def svhn_dataset_generator(dataset_name, batch_size):
    assert dataset_name in ['train', 'test']
    assert batch_size > 0 or batch_size == -1  # -1 for entire dataset
    
    path = './svhn_mat/' # path to the SVHN dataset you will download in Q1.1
    file_name = '%s_32x32.mat' % dataset_name
    file_dict = scipy.io.loadmat(os.path.join(path, file_name))
    X_all = file_dict['X'].transpose((3, 0, 1, 2))
    y_all = file_dict['y']
    data_len = X_all.shape[0]
    batch_size = batch_size if batch_size > 0 else data_len
    
    X_all_padded = np.concatenate([X_all, X_all[:batch_size]], axis=0)
    y_all_padded = np.concatenate([y_all, y_all[:batch_size]], axis=0)
    y_all_padded[y_all_padded == 10] = 0
    
    for slice_i in range(int(math.ceil(data_len / batch_size))):
        idx = slice_i * batch_size
        X_batch = X_all_padded[idx:idx + batch_size]
        y_batch = np.ravel(y_all_padded[idx:idx + batch_size])
        yield X_batch, y_batch
        
        
################################### ConvNet Model #############################

def cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize,drop_out):

    filters = []
    bias = []
    for i in range(NumConvLayer):
        with tf.variable_scope('layer'+str(i)):
            conv = tf.layers.conv2d(
                    inputs=x_,
                    filters = Numfilt,  
                    kernel_size=[KernelSize[i], KernelSize[i]],
                    padding="same",
                    activation=tf.nn.relu,
                    name = "conv")
            tf.get_variable_scope().reuse_variables()
            droped_conv = tf.nn.dropout(conv, drop_out)
            filters.append(tf.get_variable(name = 'conv/kernel'))
            bias.append(tf.get_variable(name = 'conv/bias'))
    
            pool = tf.layers.max_pooling2d(inputs=droped_conv, 
                                           pool_size=[2, 2], 
                                           strides=Numstride[i])   
        
        pool_flat = tf.contrib.layers.flatten(pool, scope='poolflat')
        dense = tf.layers.dense(inputs=pool_flat, units=500, activation=tf.nn.relu)
        droped_dense = tf.nn.dropout(dense, drop_out)
        logits = tf.layers.dense(inputs=droped_dense, units=10)
        output = [filters, bias, logits]
    return output
    
################################## Training Model ########################################

dataset_generators = {
        'train': svhn_dataset_generator('train', 512),
        'test': svhn_dataset_generator('test', 512)
}

with tf.device("/gpu:0"):  # use gpu:0 if on GPU
    x_ = tf.placeholder(tf.float32, [None, 32, 32, 3])
    y_ = tf.placeholder(tf.int32, [None])
    drop_out = tf.placeholder(tf.float32)
    filters, bias, y_logits = cnn_map(x_,NumConvLayer,Numfilt,Numstride,KernelSize, drop_out)
           
    y_dict = dict(labels=y_, logits=y_logits)
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(**y_dict)
    cross_entropy_loss = tf.reduce_mean(losses)
    trainer = tf.train.AdamOptimizer()
    train_op = trainer.minimize(cross_entropy_loss)
            
    y_pred = tf.argmax(tf.nn.softmax(y_logits), dimension=1)
    correct_prediction = tf.equal(tf.cast(y_pred, tf.int32), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


saver = tf.train.Saver()  
if change_model == True :
    shared_variables = {"layer0/conv/kernel":filters[0],
                        "layer0/conv/bias":bias[0]}
    saver = tf.train.Saver(shared_variables)
    
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
   
if load_model==True:
    saver.restore(sess,LoadFile)
    print('Weight is loaded')

for epoch_i in range(epoch_n):
    for iter_i, data_batch in enumerate(dataset_generators['train']):
        train_op.run(feed_dict={x_: data_batch[0], y_: data_batch[1], drop_out : drop_out_})  

        if iter_i % print_every == 0:
            collect = []
            arrloss = []
            	
            for test_batch in dataset_generators['test']:
                collect.append(accuracy.eval(feed_dict={x_: test_batch[0], y_: test_batch[1],drop_out : 1.0}))
                arrloss.append(cross_entropy_loss.eval(feed_dict={x_: test_batch[0], y_: test_batch[1], drop_out : 1.0}))
                    
                    
                            
            test_acc = np.mean(collect, axis=0)
            test_loss = np.mean(arrloss, axis=0)
            print("iter %d    epoch %d   test accuracy %.3f   loss %.3f" %(iter_i, epoch_i, test_acc, test_loss) )
                
if save_model==True:
    path = saver.save(sess,SaveFile)
    print('Weight is saved')