# 인공지능 과제7
### : ANN(Artifical Neural Network) 구현 및 MNIST classification
### : 3-layer, 5-layer, 10-layer ANN 구현
- 학번: 201711719
- 학과: 응용통계학과
- 이름: 심은선
- 제출 날짜: 2020.11.01

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1. Loas MNIST data (Train/ Test)

In [2]:
from keras.datasets import mnist
import torch

def load_dataset():
    (train_X, train_y), (test_X, test_y) = mnist.load_data()
    # Flatten
    train_X = train_X.reshape(-1, 28*28)
    test_X  = test_X.reshape(-1, 28*28) #28*28을 한 차원, 나머지를 한 차원으로(-1)

    # Convert to torch.tensor
    train_X = torch.tensor(train_X, dtype=torch.float)
    train_y = torch.tensor(train_y, dtype=torch.long)
    test_X = torch.tensor(test_X, dtype=torch.float)
    test_y = torch.tensor(test_y, dtype=torch.long)
    
    return (train_X, train_y), (test_X, test_y)

In [3]:
def tensor2list(input_tensor):
    return input_tensor.cpu().detach().numpy().tolist()

In [4]:
tmp = load_dataset()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## 2. Build Model

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import (DataLoader, RandomSampler, TensorDataset)
import torch.optim as optim

import os
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score

### a) 3-layers ANN

In [6]:
class ANN_Layer3(nn.Module):
    def __init__(self, config):
        super(ANN_Layer3, self).__init__()

        # MNIST data shape: 28x28
        self.width = config["input_width_size"]
        self.height = config["input_height_size"]

        # Hidden layer dimension List: len(self.feature_size) == (number of layers)-1
        self.feature_size = config["feature_size"]

        # The number of labels (classes)
        self.num_labels = config["num_labels"]

        # Activation: sigmoid
        self.activation = nn.Sigmoid()

        # 3-layers ANN
        self.layer_1 = nn.Linear(in_features=self.width*self.height, out_features = self.feature_size[0])
        self.layer_2 = nn.Linear(in_features=self.feature_size[0], out_features = self.feature_size[1])
        self.layer_3 = nn.Linear(in_features=self.feature_size[1], out_features = self.num_labels)

    
    
    def forward(self, input_features, labels=None):
        layer_1_output = self.layer_1(input_features)        # [batch, width*height] -> [batch, self.feature_size[0]]
        activated_output_1 = self.activation(layer_1_output) # [batch, self.feature_size[0]] -> [batch, self.feature_size[0]]
        layer_2_output = self.layer_2(activated_output_1)    # [batch, self.feature_size[0]] -> [batch, self.feature_size[1]]
        activated_output_2 = self.activation(layer_2_output) # [batch, self.feature_size[1]] -> [batch, self.feature_size[1]]
        layer_3_output = self.layer_3(activated_output_2)    # [batch, self.feature_size[1]] -> [batch, self.num_labels]
        activated_output_3 = self.activation(layer_3_output) # [batch, self.num_labels] -> [batch, self.num_labels]
        
        # Training
        if labels is not None:
            loss_fnc = nn.CrossEntropyLoss()
            logits = activated_output_3
            loss = loss_fnc(logits, labels) # logit: 2-dimension, labels: 1-dimension
            return loss
        # Evaluation
        else:
            output = torch.argmax(activated_output_3, -1) # size: [batch]
            return output

### b) 5-layers ANN

In [7]:
class ANN_Layer5(nn.Module):
    def __init__(self, config):
        super(ANN_Layer5, self).__init__()

        # MNIST data shape: 28x28
        self.width = config["input_width_size"]
        self.height = config["input_height_size"]

        # Hidden layer dimension List: len(self.feature_size) == (number of layers)-1
        self.feature_size = config["feature_size"]

        # The number of labels (classes)
        self.num_labels = config["num_labels"]

        # Activation: sigmoid
        self.activation = nn.Sigmoid()

        # 5-layers ANN
        self.layer_1 = nn.Linear(in_features=self.width*self.height, out_features = self.feature_size[0])
        self.layer_2 = nn.Linear(in_features=self.feature_size[0], out_features = self.feature_size[1])
        self.layer_3 = nn.Linear(in_features=self.feature_size[1], out_features = self.feature_size[2])
        self.layer_4 = nn.Linear(in_features=self.feature_size[2], out_features = self.feature_size[3])
        self.layer_5 = nn.Linear(in_features=self.feature_size[3], out_features = self.num_labels)
    
    
    def forward(self, input_features, labels=None):
        
        layer_1_output = self.layer_1(input_features)        # [batch, width*height] -> [batch, self.feature_size[0]]
        activated_output_1 = self.activation(layer_1_output) # [batch, self.feature_size[0]] -> [batch, self.feature_size[0]]
        layer_2_output = self.layer_2(activated_output_1)    # [batch, self.feature_size[0]] -> [batch, self.feature_size[1]]
        activated_output_2 = self.activation(layer_2_output) # [batch, self.feature_size[1]] -> [batch, self.feature_size[1]]
        layer_3_output = self.layer_3(activated_output_2)    # [batch, self.feature_size[1]] -> [batch, self.feature_size[2]]
        activated_output_3 = self.activation(layer_3_output) # [batch, self.feature_size[2]] -> [batch, self.feature_size[2]]
        layer_4_output = self.layer_4(activated_output_3)    # [batch, self.feature_size[2]] -> [batch, self.feature_size[3]]
        activated_output_4 = self.activation(layer_4_output) # [batch, self.feature_size[3]] -> [batch, self.feature_size[3]]
        layer_5_output = self.layer_5(activated_output_4)    # [batch, self.feature_size[3]] -> [batch, self.num_labels]
        activated_output_5 = self.activation(layer_5_output) # [batch, self.num_labels] -> [batch, self.num_labels]
        
        
        # Training
        if labels is not None:
            loss_fnc = nn.CrossEntropyLoss()
            logits = activated_output_5
            loss = loss_fnc(logits, labels) # logit: 2-dimension, labels: 1-dimension
            return loss
        # Evaluation
        else:
            output = torch.argmax(activated_output_5, -1) # size: [batch]
            return output

### c) 10-layers ANN

In [8]:
class ANN_Layer10(nn.Module):
    def __init__(self, config):
        super(ANN_Layer10, self).__init__()

        # MNIST data shape: 28x28
        self.width = config["input_width_size"]
        self.height = config["input_height_size"]

        # Hidden layer dimension List: len(self.feature_size) == (number of layers)-1
        self.feature_size = config["feature_size"]

        # The number of labels (classes)
        self.num_labels = config["num_labels"]

        # Activation: sigmoid
        self.activation = nn.Sigmoid()

        # 10-layers ANN
        self.layer_1 = nn.Linear(in_features=self.width*self.height, out_features = self.feature_size[0])
        self.layer_2 = nn.Linear(in_features=self.feature_size[0], out_features = self.feature_size[1])
        self.layer_3 = nn.Linear(in_features=self.feature_size[1], out_features = self.feature_size[2])
        self.layer_4 = nn.Linear(in_features=self.feature_size[2], out_features = self.feature_size[3])
        self.layer_5 = nn.Linear(in_features=self.feature_size[3], out_features = self.feature_size[4])
        self.layer_6 = nn.Linear(in_features=self.feature_size[4], out_features = self.feature_size[5])
        self.layer_7 = nn.Linear(in_features=self.feature_size[5], out_features = self.feature_size[6])
        self.layer_8 = nn.Linear(in_features=self.feature_size[6], out_features = self.feature_size[7])
        self.layer_9 = nn.Linear(in_features=self.feature_size[7], out_features = self.feature_size[8])
        self.layer_10 = nn.Linear(in_features=self.feature_size[8], out_features = self.num_labels)
    
    
    def forward(self, input_features, labels=None):
        
        layer_1_output = self.layer_1(input_features)          # [batch, width*height] -> [batch, self.feature_size[0]]
        activated_output_1 = self.activation(layer_1_output)   # [batch, self.feature_size[0]] -> [batch, self.feature_size[0]]
        layer_2_output = self.layer_2(activated_output_1)      # [batch, self.feature_size[0]] -> [batch, self.feature_size[1]]
        activated_output_2 = self.activation(layer_2_output)   # [batch, self.feature_size[1]] -> [batch, self.feature_size[1]]
        layer_3_output = self.layer_3(activated_output_2)      # [batch, self.feature_size[1]] -> [batch, self.feature_size[2]]
        activated_output_3 = self.activation(layer_3_output)   # [batch, self.feature_size[2]] -> [batch, self.feature_size[2]]
        layer_4_output = self.layer_4(activated_output_3)      # [batch, self.feature_size[2]] -> [batch, self.feature_size[3]]
        activated_output_4 = self.activation(layer_4_output)   # [batch, self.feature_size[3]] -> [batch, self.feature_size[3]]
        layer_5_output = self.layer_5(activated_output_4)      # [batch, self.feature_size[3]] -> [batch, self.feature_size[4]]
        activated_output_5 = self.activation(layer_5_output)   # [batch, self.feature_size[4]] -> [batch, self.feature_size[4]]
        
        layer_6_output = self.layer_6(activated_output_5)      # [batch, self.feature_size[4]] -> [batch, self.feature_size[5]]
        activated_output_6 = self.activation(layer_6_output)   # [batch, self.feature_size[5]] -> [batch, self.feature_size[5]]
        layer_7_output = self.layer_7(activated_output_6)      # [batch, self.feature_size[5]] -> [batch, self.feature_size[6]]
        activated_output_7 = self.activation(layer_7_output)   # [batch, self.feature_size[6]] -> [batch, self.feature_size[6]]
        layer_8_output = self.layer_8(activated_output_7)      # [batch, self.feature_size[6]] -> [batch, self.feature_size[7]]
        activated_output_8 = self.activation(layer_8_output)   # [batch, self.feature_size[7]] -> [batch, self.feature_size[7]]
        layer_9_output = self.layer_9(activated_output_8)      # [batch, self.feature_size[7]] -> [batch, self.feature_size[8]]
        activated_output_9 = self.activation(layer_9_output)   # [batch, self.feature_size[8]] -> [batch, self.feature_size[8]]
        layer_10_output = self.layer_10(activated_output_9)    # [batch, self.feature_size[8]] -> [batch, self.num_labels]
        activated_output_10 = self.activation(layer_10_output) # [batch, self.num_labels] -> [batch, self.num_labels]
        
        
        # Training
        if labels is not None:
            loss_fnc = nn.CrossEntropyLoss()
            logits = activated_output_10
            loss = loss_fnc(logits, labels) # logit: 2-dimension, labels: 1-dimension
            return loss
        # Evaluation
        else:
            output = torch.argmax(activated_output_10, -1) # size: [batch]
            return output

## 3. Train & Evaluation

In [9]:
# Evaluation model using test data
def do_test(model, test_dataloader):
    # Set evaluation mode
    model.eval()
    predicts, answers = [], []
    
    for step, batch in enumerate(test_dataloader):
        # Upload data to Memory
        batch = tuple(t.cuda() for t in batch)

        input_features, labels = batch
        output = model(input_features)

        predicts.extend(tensor2list(output))
        answers.extend(tensor2list(labels))
    
    print("Accuracy : {}".format(accuracy_score(answers, predicts)))

In [16]:
def train(config):
    
    # Select ANN Model
    model_param = config["model"]
    model = None
    if model_param == "ANN_Layer3":
        model = ANN_Layer3(config).cuda()
    elif model_param == "ANN_Layer5":
        model = ANN_Layer5(config).cuda()
    elif model_param == "ANN_Layer10":
        model = ANN_Layer10(config).cuda()
    else:
        return

    # Load data
    (train_X, train_y), (test_X, test_y) = load_dataset()

    # Split data in batch size & Shuffle (using TensorDataset/DataLoader)
    train_features = TensorDataset(train_X, train_y)
    train_dataloader = DataLoader(train_features, shuffle=True, batch_size=config["batch_size"])

    test_features = TensorDataset(test_X, test_y)
    test_dataloader = DataLoader(test_features, shuffle=True, batch_size=config["batch_size"])

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
    # Train
    model.train() # Train mode

    epch = 0
    for epoch in range(config["epoch"]):
        print("Epoch: {}".format(epch+1))
        # Average loss in a epoch
        losses = []

        for step, batch in enumerate(train_dataloader):
            # Upload data to Memory
            batch = tuple(t.cuda() for t in batch)
            
            # batch = (train_X[step], train_y[step])
            input_features, labels = batch 
            
            # Forward (Train mode) -> return loss
            loss = model(input_features, labels) 
            
            # Initialize gradient to zero (before backpropagation)
            optimizer.zero_grad()

            # Calculate gradient from loss
            loss.backward()

            # Update weight using gradient
            optimizer.step()

            if (step+1) % 1000 == 0:
                print("{} step processed.. current loss : {}".format(step+1, loss.data.item()))
            losses.append(loss.data.item())
    
        print("Average Loss : {}".format(np.mean(losses)))
        # Save Model (every epoch)
        torch.save(model.state_dict(), os.path.join(config["output_dir_path"], "epoch_{}.pt".format(epoch + 1)))

        # Test Model (using current model)
        do_test(model, test_dataloader)

        epch += 1
        print()

In [11]:
def test(config):
    
    # Select ANN Model
    model_param = config["model"]
    model = None
    if model_param == "ANN_Layer3":
        model = ANN_Layer3(config).cuda()
    elif model_param == "ANN_Layer5":
        model = ANN_Layer5(config).cuda()
    elif model_param == "ANN_Layer10":
        model = ANN_Layer10(config).cuda()
    else:
        return

    # 저장된 모델 가중치 Load
    model.load_state_dict(torch.load(os.path.join(config["output_dir_path"], config["trained_model_name"])))

    # 데이터 load
    (_, _), (test_X, test_y) = load_dataset()

    test_features = TensorDataset(test_X, test_y)
    test_dataloader = DataLoader(test_features, shuffle=True, batch_size=config["batch_size"])

    do_test(model, test_dataloader)

In [12]:
def Main(config, output_dir):
    # Model directory
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Select train/test
    if(config["mode"] == "train"):
        train(config)
    else:
        test(config)

### a) 3-layers ANN

In [29]:
output_dir3 = os.path.join('/content/drive/My Drive/인공지능(4-2)/wk9.ANN', 'output_layer3')

config3_train = {
            "model": "ANN_Layer3",
            "mode": "train",
            "output_dir_path":output_dir3, # to save (or load) trained weight
            "input_width_size":28, # MNIST
            "input_height_size":28,# MNIST
            "feature_size": [300, 50],
            "num_labels": 10,
            "batch_size":32,
            "epoch":50,
            }

config3_test = {
            "model": "ANN_Layer3",
            "mode": "test",
            "trained_model_name":"epoch_{}.pt".format(50), # trained model name
            "output_dir_path":output_dir3, # to save (or load) trained weight
            "input_width_size":28, # MNIST
            "input_height_size":28,# MNIST
            "feature_size": [300, 50],
            "num_labels": 10,
            "batch_size":32,
            "epoch":50,
            }

In [30]:
# Train
Main(config3_train, output_dir3)

Epoch: 1
1000 step processed.. current loss : 1.97871994972229
Average Loss : 2.0097617909113565
Accuracy : 0.8937

Epoch: 2
1000 step processed.. current loss : 1.7526930570602417
Average Loss : 1.7310325323104858
Accuracy : 0.9192

Epoch: 3
1000 step processed.. current loss : 1.5711044073104858
Average Loss : 1.6296885251363118
Accuracy : 0.9281

Epoch: 4
1000 step processed.. current loss : 1.5667675733566284
Average Loss : 1.5793620758692424
Accuracy : 0.9371

Epoch: 5
1000 step processed.. current loss : 1.5754339694976807
Average Loss : 1.55057044506073
Accuracy : 0.9441

Epoch: 6
1000 step processed.. current loss : 1.5207804441452026
Average Loss : 1.5332377925872802
Accuracy : 0.9477

Epoch: 7
1000 step processed.. current loss : 1.51459801197052
Average Loss : 1.5223919628779092
Accuracy : 0.9489

Epoch: 8
1000 step processed.. current loss : 1.5007967948913574
Average Loss : 1.5149763177235922
Accuracy : 0.9534

Epoch: 9
1000 step processed.. current loss : 1.52935826778411

In [31]:
# Test
Main(config3_test, output_dir3)

Accuracy : 0.9728


### b) 5-layers ANN

In [32]:
output_dir5 = os.path.join('/content/drive/My Drive/인공지능(4-2)/wk9.ANN', 'output_layer5')

config5_train = {
            "model": "ANN_Layer5",
            "mode": "train",
            "output_dir_path":output_dir5, # to save (or load) trained weight
            "input_width_size":28, # MNIST
            "input_height_size":28,# MNIST
            "feature_size": [600, 400, 200, 50],
            "num_labels": 10,
            "batch_size":32,
            "epoch":50,
            }

config5_test = {
            "model": "ANN_Layer5",
            "mode": "test",
            "trained_model_name":"epoch_{}.pt".format(50), # trained model name
            "output_dir_path":output_dir5, # to save (or load) trained weight
            "input_width_size":28, # MNIST
            "input_height_size":28,# MNIST
            "feature_size": [600, 400, 200, 50],
            "num_labels": 10,
            "batch_size":32,
            "epoch":50,
            }

In [33]:
# Train
Main(config5_train, output_dir5)

Epoch: 1
1000 step processed.. current loss : 2.022697687149048
Average Loss : 2.0709655775070193
Accuracy : 0.6097

Epoch: 2
1000 step processed.. current loss : 1.7988651990890503
Average Loss : 1.8479935704549153
Accuracy : 0.6505

Epoch: 3
1000 step processed.. current loss : 1.7271358966827393
Average Loss : 1.7409170670191447
Accuracy : 0.6659

Epoch: 4
1000 step processed.. current loss : 1.6416163444519043
Average Loss : 1.6757575300216674
Accuracy : 0.6735

Epoch: 5
1000 step processed.. current loss : 1.6606508493423462
Average Loss : 1.639350991121928
Accuracy : 0.6738

Epoch: 6
1000 step processed.. current loss : 1.57090425491333
Average Loss : 1.6173273258845011
Accuracy : 0.6903

Epoch: 7
1000 step processed.. current loss : 1.5898807048797607
Average Loss : 1.6043344895044962
Accuracy : 0.7013

Epoch: 8
1000 step processed.. current loss : 1.5734333992004395
Average Loss : 1.5958504837036134
Accuracy : 0.6804

Epoch: 9
1000 step processed.. current loss : 1.531331777572

In [34]:
# Test
Main(config5_test, output_dir5)

Accuracy : 0.9649


### c) 10-layers ANN

In [38]:
output_dir10 = os.path.join('/content/drive/My Drive/인공지능(4-2)/wk9.ANN', 'output_layer10')

config10_train = {
            "model": "ANN_Layer10",
            "mode": "train",
            "output_dir_path":output_dir10, # to save (or load) trained weight
            "input_width_size":28, # MNIST
            "input_height_size":28,# MNIST
            "feature_size": [700, 600, 500, 400, 300, 200, 100, 50, 20],
            "num_labels": 10,
            "batch_size":32,
            "epoch":50,
            }

config10_test = {
            "model": "ANN_Layer10",
            "mode": "test",
            "trained_model_name":"epoch_{}.pt".format(50), # trained model name
            "output_dir_path":output_dir10, # to save (or load) trained weight
            "input_width_size":28, # MNIST
            "input_height_size":28,# MNIST
            "feature_size": [700, 600, 500, 400, 300, 200, 100, 50, 20],
            "num_labels": 10,
            "batch_size":32,
            "epoch":50,
            }

In [36]:
# Train
Main(config10_train, output_dir10)

Epoch: 1
1000 step processed.. current loss : 2.300807237625122
Average Loss : 2.276123397572835
Accuracy : 0.201

Epoch: 2
1000 step processed.. current loss : 2.1767067909240723
Average Loss : 2.122154210027059
Accuracy : 0.2103

Epoch: 3
1000 step processed.. current loss : 2.025160789489746
Average Loss : 2.055441135660807
Accuracy : 0.2101

Epoch: 4
1000 step processed.. current loss : 1.9527311325073242
Average Loss : 2.0301173805236816
Accuracy : 0.2091

Epoch: 5
1000 step processed.. current loss : 2.0192487239837646
Average Loss : 2.024695769818624
Accuracy : 0.2112

Epoch: 6
1000 step processed.. current loss : 2.082991361618042
Average Loss : 2.0155570807774863
Accuracy : 0.2087

Epoch: 7
1000 step processed.. current loss : 2.1398940086364746
Average Loss : 2.0116736794789634
Accuracy : 0.2122

Epoch: 8
1000 step processed.. current loss : 2.1959710121154785
Average Loss : 2.014144316037496
Accuracy : 0.2115

Epoch: 9
1000 step processed.. current loss : 1.9631913900375366


In [39]:
# Test
Main(config10_test, output_dir10)

Accuracy : 0.2111


### d) Result

<h4> Accuracy (test) </h4>
<pre>
     The number of layers   |     Accuracy
---------------------------------------------------
          3-layers          |      0.9728
          5-layers          |      0.9649
          10-layers         |      0.2111

( training 50 epochs, using 50 epoch weight)
</pre>