# Fully Connected Nueral Net

In [1]:
!type python 
from tqdm import tqdm
from platform import python_version
from warnings import filterwarnings
filterwarnings('ignore')
import tensorflow as tf
import numpy as np
import os
import sys
print(python_version())
print(tf.__version__)
# use GPU
conf = tf.ConfigProto()
conf.gpu_options.allow_growth = True
# print(tf.test.is_gpu_available())

python is /home/kddlab/anaconda3/envs/swyoo_dl/bin/python
3.6.9
1.12.0


## load dataset
```python
# if you want to use this module, you can do it
from sklearn.model_selection import train_test_split  # version 0.21.3
```

In [2]:
# using tensorflow_gpu=1.12.0, dataset can be downloaded
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()
valid_x, train_x, valid_y, train_y = train_x[:5000], train_x[5000:], train_y[:5000], train_y[5000:]
print("Size of:")
print("- Training-set:\t\t\t{} \t label: \t{}".format(train_x.shape, train_y.shape))
print("- Validation-set:\t\t{} \t\t label: \t{}".format(valid_x.shape, valid_y.shape))
print("- Test-set:\t\t\t{} \t label: \t{}".format(test_x.shape, test_y.shape))

Size of:
- Training-set:			(55000, 28, 28) 	 label: 	(55000,)
- Validation-set:		(5000, 28, 28) 		 label: 	(5000,)
- Test-set:			(10000, 28, 28) 	 label: 	(10000,)


In [3]:
""" helper functions """
# yield mini-batch function
def get_minibatch(X, Y, b):
    """
    inputs: 
    - X: data features, shape (N, D)
    - Y: labels, shape (N, )
    - b: batch size, shape ()
    
    outputs:
    - x_mini, y_mini: minibatch dataset
    """
    step = len(X) // b
    # yield minibatch for each step
    for indices in np.array_split(np.random.permutation(len(X)), step):
        x_mini, y_mini = X[indices], Y[indices]
        yield x_mini, y_mini

### basic module

In [4]:
""" basic module"""
class Model:
    # computation graph 
    def __init__(self, H, W, HD, L):
        """
        hyper parameters
        _, H, W = train_x.shape
        HD = 1024 # hidden dimension
        L = 10 # number of labels
        """
        # feed
        self.X = tf.placeholder(dtype=tf.float32, shape=(None, H * W))
        self.Y = tf.placeholder(dtype=tf.int64, shape=(None))

        # parameters. 
        w1 = tf.Variable(tf.truncated_normal([H * W, HD]))
        b1 = tf.Variable(tf.zeros([HD]))
        w2 = tf.Variable(tf.truncated_normal([HD, L]))
        b2 = tf.Variable(tf.zeros([L]))

        # training computation.
        hidden = tf.tanh(tf.matmul(self.X, w1) + b1)
        self.logits = tf.matmul(hidden, w2) + b2
        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.Y, logits=self.logits))

        # predictions for the training, validation, and test data.
        self.pred = tf.nn.softmax(self.logits)
        
        # metric
        correct = tf.nn.in_top_k(predictions=self.logits, targets=self.Y, k=1)
        self.accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))
        
    # train and evaluation  
    def fit(self, config, train_x, train_y, valid_x, valid_y, epoch, lr, b, dr, save=False, SAVE_FILE= './models_mnist/model'):
        """
        hyper parameters
        - epoch = 10
        - lr = 0.001
        - b = 150 # minibatch size
        - dr = 0.97 # learning rate decay rate 
        """
        step =  tf.Variable(0, trainable=False, name='global_step')
        # each decay_step, learning rate will be decreased by decay_rate
        # decayed_learning_rate = learning_rate * decay_rate^(global_step / decay_steps) 
        lr = tf.train.exponential_decay(learning_rate=lr, global_step=step, decay_steps=100, decay_rate=dr, staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(self.loss, global_step=step)
        
        _, H, W = train_x.shape
        
        # save options 
        saver = tf.train.Saver()
        
        with tf.Session(config=config) as sess:
            # initialize all variables 
            sess.run(tf.global_variables_initializer())

            # minibatch training 
            for e in range(epoch):
                loss_train = 0
                for i, xy in tqdm(enumerate(get_minibatch(train_x, train_y, b)), desc='Train'):
                    # minibatch dataset
                    x, y = xy
                    x = np.reshape(x, [-1, H*W])
                    feed = {self.X: x, self.Y: y}
                    loss_mini, _ = sess.run([self.loss, optimizer], feed_dict=feed)
                    loss_train += loss_mini
                loss_train = loss_train / (i+1)
                
                # evaluation
                valid_x = np.reshape(valid_x, [-1, H*W])
                acc_valid = self.accuracy.eval(feed_dict={self.X: valid_x, self.Y: valid_y})
                print('epoch {} \t| loss: {:.4f} \t| acc_valid: {:.4f} \t| lr: {:0.5} '.format(e+1, loss_train, acc_valid, lr.eval()))
            print("Training End")
            if save: 
                saver.save(sess, SAVE_FILE)
                print("save model @{}".format(SAVE_FILE))
    
    # restore model and evaluation
    def test(self, config, test_x, test_y, SAVE_FILE='./models_mnist/model'):
        """
        restore trained weights and evaluate a model
        """
        test_x = np.reshape(test_x, [-1, H*W])
        saver = tf.train.Saver()
        
        with tf.Session(config=conf) as sess:
            saver.restore(sess, SAVE_FILE)
            print("restore is completed")
            acc_test = self.accuracy.eval(feed_dict={self.X: test_x, self.Y: test_y})
            print("acc_test: {}".format(acc_test))

In [5]:
# Clear old variables
tf.reset_default_graph()
_, H, W = train_x.shape
model = Model(H=H, W=W, HD=1024, L=10)
model.fit(conf, train_x, train_y, valid_x, valid_y, epoch=5, lr=0.001, b=150, dr=0.97, save=True)
model.test(conf, test_x, test_y)

Train: 366it [00:00, 689.27it/s]
Train: 91it [00:00, 905.31it/s]

epoch 1 	| loss: 14.7908 	| acc_valid: 0.6430 	| lr: 0.00091267 


Train: 366it [00:00, 913.21it/s]
Train: 89it [00:00, 889.77it/s]

epoch 2 	| loss: 6.1408 	| acc_valid: 0.7416 	| lr: 0.00080798 


Train: 366it [00:00, 907.29it/s]
Train: 97it [00:00, 963.73it/s]

epoch 3 	| loss: 4.6695 	| acc_valid: 0.7836 	| lr: 0.00073742 


Train: 366it [00:00, 953.11it/s]
Train: 86it [00:00, 854.08it/s]

epoch 4 	| loss: 3.9491 	| acc_valid: 0.8074 	| lr: 0.00065284 


Train: 366it [00:00, 912.61it/s]


epoch 5 	| loss: 3.4931 	| acc_valid: 0.8226 	| lr: 0.00057795 
Training End
save model @./models_mnist/model
INFO:tensorflow:Restoring parameters from ./models_mnist/model
restore is completed
acc_test: 0.8148000240325928


### layers module

In [6]:
""" layers module"""
class Model:
    # computation graph 
    def __init__(self, H, W, HD, L, beta):
        """
        hyper parameters
        _, H, W = train_x.shape
        HD = 1024 # hidden dimension
        L = 10 # number of labels
        """
        # feed
        self.X = tf.placeholder(dtype=tf.float32, shape=(None, H, W))
        self.Y = tf.placeholder(dtype=tf.int64, shape=(None))
        self.dropout = tf.placeholder(tf.float32)
        
        # helper
        reg = tf.contrib.layers.l2_regularizer(scale=beta)
        init = tf.contrib.layers.xavier_initializer()
        
        with tf.variable_scope("FC", initializer=init, regularizer=reg):
            X = tf.contrib.layers.flatten(self.X)
            dense = tf.contrib.layers.fully_connected(inputs=X, num_outputs=HD, activation_fn=tf.nn.relu)
            # dense = tf.layers.dense(x, L, activation = tf.nn.relu)
            dense = tf.nn.dropout(dense, keep_prob=self.dropout)
            self.logits = tf.contrib.layers.fully_connected(inputs=dense, num_outputs=L, activation_fn=None)
            # self.logits = tf.layers.dense(dense, L, activation=None)

        with tf.variable_scope("loss", initializer=init, regularizer=reg):
            reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.Y, logits=self.logits))
            self.loss += tf.reduce_sum(reg_loss) # scalar
            
        # metric
        self.pred = tf.nn.softmax(self.logits)
        correct = tf.nn.in_top_k(predictions=self.logits, targets=self.Y, k=1)
        self.accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))
        
    # train and evaluation  
    def fit(self, config, train_x, train_y, valid_x, valid_y, epoch, lr, b, dr, keep_prob=0.7, 
            save=False, SAVE_FILE='./models_mnist/model', log=False, LOG_DIR='./models_mnist'):
        """
        hyper parameters
        - epoch = 10
        - lr = 0.001
        - b = 150 # minibatch size
        - dr = 0.97 # learning rate decay rate
        - keep_prob = 0.7
        """
        step = tf.get_variable(name="global_step", shape=(), initializer=tf.zeros_initializer(), trainable=False)
        # each decay_step, learning rate will be decreased by decay_rate
        # decayed_learning_rate = learning_rate * decay_rate^(global_step / decay_steps) 
        lr = tf.train.exponential_decay(learning_rate=lr, global_step=step, decay_steps=100, decay_rate=dr, staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(self.loss, global_step=step)
        
        _, H, W = train_x.shape
        
        # save options 
        saver = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        # saver = tf.train.Saver()
        
        # tensorboard visualization
        if log:
            train_loss = tf.get_variable(name='train_loss', shape=(), dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False)
            valid_acc = tf.get_variable(name='valid_acc', shape=(), dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False)
            train_loss_summ = tf.summary.scalar('train_loss', train_loss)
            valid_acc_summ = tf.summary.scalar('valid_acc', valid_acc)
            summ_op = tf.summary.merge([train_loss_summ, valid_acc_summ])
        
        with tf.Session(config=config) as sess:
            # initialize all variables 
            sess.run(tf.global_variables_initializer())
            # add graph to tensorboard
            if log:
                print("@terminal: $ tensorboard --logdir={}".format(LOG_DIR))
                writer = tf.summary.FileWriter(LOG_DIR, session=sess)
                writer.add_graph(sess.graph)
            # minibatch training 
            k = 0
            for e in range(epoch):
                loss_train = 0
                for i, xy in tqdm(enumerate(get_minibatch(train_x, train_y, b)), desc='Train'):
                    # minibatch dataset
                    x, y = xy
                    feed = {self.X: x, self.Y: y, self.dropout: keep_prob}
                    loss_mini, _ = sess.run([self.loss, optimizer], feed_dict=feed)
                    loss_train += loss_mini
                loss_train = loss_train / (i+1)
                
                # evaluation
                acc_valid = self.accuracy.eval(feed_dict={self.X: valid_x, self.Y: valid_y, self.dropout: 1})
                print('epoch {} \t| loss: {:.4f} \t| acc_valid: {:.4f} \t| lr: {:0.5} '.format(e+1, loss_train, acc_valid, lr.eval()))
                if log:
                    sess.run([train_loss.assign(loss_train), valid_acc.assign(acc_valid)])
                    summary = sess.run(summ_op)
                    writer.add_summary(summary, global_step=k)
                    k += 1
            print("Training End")
            if save: 
                saver.save(sess, SAVE_FILE)
                print("save model @{}".format(SAVE_FILE))
    # restore model and evaluation
    def test(self, config, test_x, test_y, SAVE_FILE='./models_mnist/model'):
        """
        restore trained weights and evaluate a model
        """
        saver = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        # saver = tf.train.Saver()
        
        with tf.Session(config=config) as sess:
            saver.restore(sess, SAVE_FILE)
            print("restore is completed")
            acc_test = self.accuracy.eval(feed_dict={self.X: test_x, self.Y: test_y, self.dropout: 1})
            print("acc_test: {}".format(acc_test))
            
    def summary(self):
        # print("=============================================")
        # print("list of all parameters")
        # print("=============================================")
        # for x in tf.global_variables():
        #     print(x)
        
        print("=============================================")
        print("list of all trainable parameters")
        print("=============================================")
        for x in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
            print(x)
        
        # print("=============================================")
        # print("list of parameters reflected regularization ")
        # print("=============================================")
        # for x in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
        #     print(x)

In [7]:
tf.reset_default_graph()
_, H, W = train_x.shape
model = Model(H=H, W=W, HD=1024, L=10, beta=0.1)
model.fit(conf, train_x, train_y, valid_x, valid_y, epoch=10, lr=0.001, b=150, dr=0.97, keep_prob=0.7, save=True, log=True)
model.summary()
model.test(conf, test_x, test_y)

Train: 41it [00:00, 408.45it/s]

@terminal: $ tensorboard --logdir=./models_mnist


Train: 366it [00:00, 747.59it/s]
Train: 86it [00:00, 855.52it/s]

epoch 1 	| loss: 25.5607 	| acc_valid: 0.9518 	| lr: 0.00091267 


Train: 366it [00:00, 868.31it/s]
Train: 79it [00:00, 783.49it/s]

epoch 2 	| loss: 9.2745 	| acc_valid: 0.9586 	| lr: 0.00080798 


Train: 366it [00:00, 833.73it/s]
Train: 86it [00:00, 858.29it/s]

epoch 3 	| loss: 4.4781 	| acc_valid: 0.9628 	| lr: 0.00073742 


Train: 366it [00:00, 860.17it/s]
Train: 84it [00:00, 830.40it/s]

epoch 4 	| loss: 2.3403 	| acc_valid: 0.9676 	| lr: 0.00065284 


Train: 366it [00:00, 830.31it/s]
Train: 85it [00:00, 840.95it/s]

epoch 5 	| loss: 1.3452 	| acc_valid: 0.9622 	| lr: 0.00057795 


Train: 366it [00:00, 853.45it/s]
Train: 83it [00:00, 827.82it/s]

epoch 6 	| loss: 0.8565 	| acc_valid: 0.9692 	| lr: 0.00052748 


Train: 366it [00:00, 812.75it/s]
Train: 84it [00:00, 837.99it/s]

epoch 7 	| loss: 0.6022 	| acc_valid: 0.9724 	| lr: 0.00046698 


Train: 366it [00:00, 848.15it/s]
Train: 88it [00:00, 873.64it/s]

epoch 8 	| loss: 0.4669 	| acc_valid: 0.9738 	| lr: 0.00041341 


Train: 366it [00:00, 853.43it/s]
Train: 86it [00:00, 859.78it/s]

epoch 9 	| loss: 0.3802 	| acc_valid: 0.9734 	| lr: 0.00037731 


Train: 366it [00:00, 867.04it/s]

epoch 10 	| loss: 0.3290 	| acc_valid: 0.9764 	| lr: 0.00033403 
Training End
save model @./models_mnist/model
list of all trainable parameters
<tf.Variable 'FC/fully_connected/weights:0' shape=(784, 1024) dtype=float32_ref>
<tf.Variable 'FC/fully_connected/biases:0' shape=(1024,) dtype=float32_ref>
<tf.Variable 'FC/fully_connected_1/weights:0' shape=(1024, 10) dtype=float32_ref>
<tf.Variable 'FC/fully_connected_1/biases:0' shape=(10,) dtype=float32_ref>
INFO:tensorflow:Restoring parameters from ./models_mnist/model





restore is completed
acc_test: 0.9732000231742859


## keras module
[official guide](https://github.com/tensorflow/docs/tree/master/site/en/r1)

In [8]:
from tensorflow import keras

In [9]:
_, H, W = train_x.shape

In [10]:
model = keras.Sequential([keras.layers.Flatten(input_shape=(W,H)), 
                          keras.layers.Dense(units=128, activation='relu'), 
                          keras.layers.Dense(units=10, activation='softmax')])

In [11]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [12]:
history = model.fit(x=train_x, y=train_y, batch_size=150, epochs=5, verbose=1)
# history_dict = history.history
# for key, values in history_dict.items():
#     print(key, values)
model.summary()
results = model.evaluate(x=test_x, y=test_y, verbose=1)
print(results)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
[4.017395917129517, 0.7487]


### customize keras
[official tutorial document](https://www.tensorflow.org/tutorials/quickstart/advanced?hl=ko)  
[official tutorial korean](https://www.tensorflow.org/guide/keras/overview)  
[korean tutorial blog](https://hwiyong.tistory.com/category/%23%20Machine%20Learning/tensorflow2.0%28keras%29?page=3)  
how to use tf.data.Dataset? [blog](https://cyc1am3n.github.io/2018/09/13/how-to-use-dataset-in-tensorflow.html)

In [13]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D

In [14]:
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.flat = Flatten()
        self.d1 = Dense(units=128, activation='relu')
        self.d2 = Dense(units=10, activation='softmax')
    
    # overriding 
    def call(self, x):
        """
        params: 
        - x: inputs 
        return: predictions
        """
        x = self.d2(self.d1(self.flat(x)))
        return x

In [15]:
model = MyModel()
model.compile(optimizer='adam', 
              loss=tf.keras.losses.sparse_categorical_crossentropy, 
              metrics=['accuracy'])
history = model.fit(x=train_x, y=train_y, batch_size=150, epochs=5, verbose=1)
# history_dict = history.history
# for key, values in history_dict.items():
#     print(key, values)
model.summary()
results = model.evaluate(x=test_x, y=test_y, verbose=1)
print(results)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          multiple                  0         
_________________________________________________________________
dense_2 (Dense)              multiple                  100480    
_________________________________________________________________
dense_3 (Dense)              multiple                  1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
[3.925171082305908, 0.7546]


In [16]:
model.save_weights('./models_mnist/model')
model.load_weights('./models_mnist/model')
results = model.evaluate(x=test_x, y=test_y, verbose=1)
results


Consider using a TensorFlow optimizer from `tf.train`.


[3.925171082305908, 0.7546]

# pytorch

- korean blog [basic](https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html) [advanced](https://greeksharifa.github.io/pytorch/2018/11/10/pytorch-usage-03-How-to-Use-PyTorch/)  
[swyoo simplified version](https://nbviewer.jupyter.org/github/SUNGWOOKYOO/practice/blob/master/pytorch.ipynb)  
[official document](https://pytorch.org/docs/0.4.0/)  


In [1]:
import torch
torch.__version__

'0.4.0'

In [17]:
class MyModel(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        생성자에서 2개의 nn.Linear 모듈을 생성하고, 멤버 변수로 지정합니다.
        """
        super(MyModel, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        순전파 함수에서는 입력 데이터의 Tensor를 받고 출력 데이터의 Tensor를
        반환해야 합니다. Tensor 상의 임의의 연산자뿐만 아니라 생성자에서 정의한
        Module도 사용할 수 있습니다.
        """
        h_relu = torch.nn.functional.relu(self.linear1(x))
        y_pred = self.linear2(h_relu)
        return y_pred

In [18]:
# N은 배치 크기이며, D_in은 입력의 차원입니다;
# H는 은닉층의 차원이며, D_out은 출력 차원입니다.
N, D_in, H, D_out = 64, 1000, 100, 10

# 입력과 출력을 저장하기 위해 무작위 값을 갖는 Tensor를 생성합니다.
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# 앞에서 정의한 클래스를 생성하여 모델을 구성합니다.
model = MyModel(D_in, H, D_out)

# GPU를 사용하고 싶은경우 
if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    model = model.cuda()
    print('cuda is available')

# 손실 함수와 Optimizer를 만듭니다. SGD 생성자에 model.parameters()를 호출하면
# 모델의 멤버인 2개의 nn.Linear 모듈의 학습 가능한 매개변수들이 포함됩니다.
criterion = torch.nn.MSELoss(size_average=False, reduce=True)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

cuda is available


In [19]:
for t in range(500):
    # 순전파 단계: 모델에 x를 전달하여 예상되는 y 값을 계산합니다.
    y_pred = model(x)

    # 손실을 계산하고 출력합니다.
    loss = criterion(y_pred, y)
    if t%100==0: print(t, loss.item())

    # 변화도를 0으로 만들고, 역전파 단계를 수행하고, 가중치를 갱신합니다.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 661.175048828125
100 2.600653648376465
200 0.04426896944642067
300 0.0014514968497678638
400 6.26491237198934e-05


[furthermore](https://www.kaggle.com/sdelecourt/cnn-with-pytorch-for-mnist)