### Initialization

In [1]:
# For Colab only!

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
import torch
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch import nn

In [3]:
print(tf.__version__)
print(tf.test.is_gpu_available())

2.1.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


In [4]:
print(torch.__version__)
print(torch.cuda.is_available())

1.4.0
True


### Data Loading
MINST data set

In [5]:
import numpy as np

batch_size=200
learning_rate=0.01
epochs=10

In [41]:
(x, y),(x_test, y_test) = keras.datasets.mnist.load_data()

ds_train = tf.data.Dataset.from_tensor_slices((x,y))
ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

def preprocess(x, y):
  x = (tf.cast(x, tf.float32)/255)-0.1307
  y = tf.cast(y, tf.int32)
#   y = tf.one_hot(y,depth=10)   
  return x, y

ds_train = ds_train.map(preprocess).shuffle(1000).batch(batch_size)
ds_test = ds_test.map(preprocess).shuffle(1000).batch(batch_size)


In [7]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

In [42]:
print(type(ds_test))
image, label = next(iter(ds_test))
print(image.shape, label.shape)

<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>
(200, 28, 28) (200,)


In [9]:
print(type(train_loader))
[image, label] = next(iter(train_loader))
print(image.shape, label.shape)

<class 'torch.utils.data.dataloader.DataLoader'>
torch.Size([200, 1, 28, 28]) torch.Size([200])


### Full-connnected manually
[b, 786] -> [b, 200] -> [b, 100] -> [b, 10]

w1: [786, 200], b1: [200],

w2: [200,100], b2: [100], 

w3: [100,10], b3：[10]

In [35]:
# weights and bias
w1 = tf.Variable(tf.random.uniform([28*28, 200]))
b1 = tf.Variable(tf.zeros([200]))

w2 = tf.Variable(tf.random.uniform([200, 100]))
b2 = tf.Variable(tf.zeros([100]))

w3 = tf.Variable(tf.random.uniform([100, 10]))
b3 = tf.Variable(tf.zeros([10]))


# forward func
def model(x):
    x = tf.nn.relu(x@w1 + b1)
    x = tf.nn.relu(x@w2 + b2)
    x = x@w3 + b3
        
    return x

optimizer = tf.optimizers.Adam(learning_rate)

for epoch in range(epochs):
    
    for step, (x, y) in enumerate(ds_train):
        x = tf.reshape(x, [-1, 28*28])
        with tf.GradientTape() as tape:            
            logits = model(x)
            
            losses = tf.losses.sparse_categorical_crossentropy(y,logits,from_logits=True)
            loss = tf.reduce_mean(losses)
            
        grads = tape.gradient(loss, [w1,b1,w2,b2,w3,b3])
        
        optimizer.apply_gradients(zip(grads, [w1,b1,w2,b2,w3,b3]))
        
        if(step%100==0):
            print("epoch:{}, step:{} loss:{}".
                  format(epoch, step, loss.numpy()))


epoch:0, step:0 loss:1404.9921875
epoch:0, step:100 loss:2.2279036045074463
epoch:0, step:200 loss:2.3840603828430176
epoch:1, step:0 loss:2.1420228481292725
epoch:1, step:100 loss:2.0314340591430664
epoch:1, step:200 loss:2.0380284786224365
epoch:2, step:0 loss:2.127847671508789
epoch:2, step:100 loss:2.021336793899536
epoch:2, step:200 loss:1.6273704767227173
epoch:3, step:0 loss:1.5105515718460083
epoch:3, step:100 loss:1.5376299619674683
epoch:3, step:200 loss:1.2730509042739868
epoch:4, step:0 loss:1.2069252729415894
epoch:4, step:100 loss:1.358099341392517
epoch:4, step:200 loss:1.1399234533309937
epoch:5, step:0 loss:1.064672827720642
epoch:5, step:100 loss:1.315996527671814
epoch:5, step:200 loss:1.071283221244812
epoch:6, step:0 loss:1.001338243484497
epoch:6, step:100 loss:0.7731111645698547
epoch:6, step:200 loss:0.4953363537788391
epoch:7, step:0 loss:0.6500086784362793
epoch:7, step:100 loss:0.7191830277442932
epoch:7, step:200 loss:0.5285062193870544
epoch:8, step:0 loss:

In [37]:
# weights and bias
w1 = torch.rand(28*28, 200 , requires_grad=True)
b1 = torch.zeros(200, requires_grad=True)

w2 = torch.rand(200, 100, requires_grad=True)
b2 = torch.zeros(100, requires_grad=True)

w3 = torch.rand(100, 10, requires_grad=True)
b3 = torch.zeros(10, requires_grad=True)

# torch.nn.init.kaiming_normal_(w1)
# torch.nn.init.kaiming_normal_(w2)
# torch.nn.init.kaiming_normal_(w3)

# forward func
def forward(x):
    x = F.relu(x@w1 + b1)
    x = F.relu(x@w2 + b2)
    x = x@w3 + b3
        
    return x

optimizer = torch.optim.Adam([w1,b1,w2,b2,w3,b3],
                            lr=learning_rate)
criteon = torch.nn.CrossEntropyLoss()

for epoch in range(epochs):
    
    for step, (x, y) in enumerate(train_loader):
        x = x.reshape(-1,28*28)
        
        logits = forward(x)
        loss = criteon(logits, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(step%100 == 0):
            print("epoch:{}, step:{}, loss:{}".
                  format(epoch, step, loss.item()))

epoch:0, step:0, loss:9682.416015625
epoch:0, step:100, loss:2.488150119781494
epoch:0, step:200, loss:2.2699265480041504
epoch:1, step:0, loss:2.274050235748291
epoch:1, step:100, loss:2.34322452545166
epoch:1, step:200, loss:2.2167744636535645
epoch:2, step:0, loss:2.3210036754608154
epoch:2, step:100, loss:2.2207939624786377
epoch:2, step:200, loss:2.4887542724609375
epoch:3, step:0, loss:2.344801187515259
epoch:3, step:100, loss:2.1573853492736816
epoch:3, step:200, loss:2.2278380393981934
epoch:4, step:0, loss:2.2109551429748535
epoch:4, step:100, loss:2.190887928009033
epoch:4, step:200, loss:2.702714204788208
epoch:5, step:0, loss:2.1471283435821533
epoch:5, step:100, loss:2.527514696121216
epoch:5, step:200, loss:2.131390333175659
epoch:6, step:0, loss:2.4100725650787354
epoch:6, step:100, loss:1.98695969581604
epoch:6, step:200, loss:1.888046383857727
epoch:7, step:0, loss:2.0443994998931885
epoch:7, step:100, loss:1.8937968015670776
epoch:7, step:200, loss:1.9226715564727783


### Full-connnected higher level

In [61]:
class FC_model(keras.Model):
    def __init__(self):
        super().__init__()
    
        self.model = keras.Sequential(
            [layers.Dense(200),
            layers.ReLU(),
            layers.Dense(100),
            layers.ReLU(),
            layers.Dense(10)]
            )
    
    def call(self,x):
        x = self.model(x)
        
        return x
    
model = FC_model()
optimizer = tf.optimizers.Adam(learning_rate)
    
for epoch in range(epochs):
    
    for step, (x, y) in enumerate(ds_train):
        x = tf.reshape(x, [-1, 28*28])
        with tf.GradientTape() as tape:            
            logits = model(x)
            
            losses = tf.losses.sparse_categorical_crossentropy(y,logits,from_logits=True)
            loss = tf.reduce_mean(losses)
            
        grads = tape.gradient(loss, model.variables)
        
        optimizer.apply_gradients(zip(grads, model.variables))
        
        if(step%100==0):
            print("epoch:{}, step:{} loss:{}".
                  format(epoch, step, loss.numpy()))
            
            
#             test accuracy: 
            total_correct = 0
            total_num = 0
            
            for x_test, y_test in ds_test:
                x_test = tf.reshape(x_test, [-1, 28*28])
                y_pred = tf.argmax(model(x_test),axis=1)
                y_pred = tf.cast(y_pred, tf.int32)
                correct = tf.cast((y_pred == y_test), tf.int32)
                correct = tf.reduce_sum(correct)
                
                total_correct += int(correct)
                total_num += x_test.shape[0]
        
            
            accuracy = total_correct/total_num
            print('accuracy: ', accuracy)


epoch:0, step:0 loss:2.366990566253662
accuracy:  0.4342
epoch:0, step:100 loss:0.1408572494983673
accuracy:  0.9444
epoch:0, step:200 loss:0.1306784301996231
accuracy:  0.9476
epoch:1, step:0 loss:0.18211042881011963
accuracy:  0.9523
epoch:1, step:100 loss:0.1134713813662529
accuracy:  0.9645
epoch:1, step:200 loss:0.10413701087236404
accuracy:  0.9562
epoch:2, step:0 loss:0.10256875306367874
accuracy:  0.9589
epoch:2, step:100 loss:0.08379142731428146
accuracy:  0.9636
epoch:2, step:200 loss:0.06404948979616165
accuracy:  0.9685
epoch:3, step:0 loss:0.031297821551561356
accuracy:  0.9668
epoch:3, step:100 loss:0.06204662472009659
accuracy:  0.9681
epoch:3, step:200 loss:0.039909422397613525
accuracy:  0.9734
epoch:4, step:0 loss:0.12970837950706482
accuracy:  0.9707
epoch:4, step:100 loss:0.1105945035815239
accuracy:  0.9647
epoch:4, step:200 loss:0.1333925724029541
accuracy:  0.969
epoch:5, step:0 loss:0.0438968688249588
accuracy:  0.9689
epoch:5, step:100 loss:0.06427070498466492


In [67]:
class FC_NN(nn.Module):
    def __init__(self):
        super().__init__()
    
        self.model = nn.Sequential(
            nn.Linear(28*28, 200),
            nn.ReLU(inplace=True),
            nn.Linear(200, 100),
            nn.ReLU(inplace=True),
            nn.Linear(100,10)
            )
    
    def forward(self, x):
        x = self.model(x)
        
        return x
device = torch.device('cuda:0')

network = FC_NN().to(device)        
optimizer = torch.optim.Adam(network.parameters(),
                            lr=learning_rate)
criteon = torch.nn.CrossEntropyLoss().to(device)

for epoch in range(epochs):
    
    for step, (x, y) in enumerate(train_loader):
        x = x.reshape(-1,28*28)
        
        x, y = x.to(device), y.to(device)
        
        logits = network(x)
        loss = criteon(logits, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(step%100 == 0):
            print("epoch:{}, step:{}, loss:{}".
                  format(epoch, step, loss.item()))
        
#             test accuracy
            total_correct = 0
            total_num = 0    

            for x_test, y_test in test_loader:
                    x_test = x_test.reshape(-1,28*28)
                    x_test, y_test = x_test.to(device), y_test.to(device)

                    y_pred = network(x_test)
                    y_pred = torch.argmax(y_pred, dim = 1)
                    correct = y_pred == y_test
                    correct = correct.sum()

                    total_correct += correct
                    total_num += x_test.shape[0]

            acc = total_correct.float()/total_num
            print("accuracy: ", acc.item())
                
                

epoch:0, step:0, loss:2.2986953258514404
accuracy:  0.17790000140666962
epoch:0, step:100, loss:0.17231173813343048
accuracy:  0.9359999895095825
epoch:0, step:200, loss:0.15071144700050354
accuracy:  0.9472999572753906
epoch:1, step:0, loss:0.07731062173843384
accuracy:  0.961899995803833
epoch:1, step:100, loss:0.19826166331768036
accuracy:  0.9538999795913696
epoch:1, step:200, loss:0.14187023043632507
accuracy:  0.9587999582290649
epoch:2, step:0, loss:0.0891067311167717
accuracy:  0.9620999693870544
epoch:2, step:100, loss:0.12733696401119232
accuracy:  0.9612999558448792
epoch:2, step:200, loss:0.11065959930419922
accuracy:  0.9555999636650085
epoch:3, step:0, loss:0.1741502583026886
accuracy:  0.9610999822616577
epoch:3, step:100, loss:0.06588414311408997
accuracy:  0.9656999707221985
epoch:3, step:200, loss:0.1473999172449112
accuracy:  0.9538999795913696
epoch:4, step:0, loss:0.1541697084903717
accuracy:  0.9605000019073486
epoch:4, step:100, loss:0.04024747014045715
accuracy: