<a href="https://colab.research.google.com/github/Shj-A/2022_ML_Project/blob/main/RNN_MNIST_HW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

# you have to set mini-batch size as a hyperparameter
# batch size : how many samples per batch to load
batch_size = 32

train_data = torchvision.datasets.MNIST(
    root = './data/MNIST',
    train = True,
    download = True,
    transform = transforms.Compose([transforms.ToTensor()])
)

test_data = torchvision.datasets.MNIST(
    root = './data/MNIST',
    train = False,
    download = True,
    transform = transforms.Compose([transforms.ToTensor()])
)

train_data, valid_data = train_test_split(train_data, test_size=0.2, shuffle=True)
print('# of train data : {}'.format(len(train_data)))
print('# of valid data : {}'.format(len(valid_data)))
print('# of test data : {}'.format(len(test_data)))

train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, shuffle = False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw

# of train data : 48000
# of valid data : 12000
# of test data : 10000


In [None]:
from torch.nn.modules.dropout import Dropout
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, n_outputs, sequence_len):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_len, n_outputs)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        x, _ = self.lstm(x, (h0, c0))
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, n_outputs, sequence_len):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_len, n_outputs)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        x, _ = self.gru(x, h0)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

In [None]:
input_size = 28
sequence_len = 28
num_layers = 2
hidden_size = 64
n_outputs = 10

learning_rate = 0.01

model = LSTM(input_size, hidden_size, num_layers, n_outputs, sequence_len)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
loss_function = nn.CrossEntropyLoss()

# 3-2 Change the number of hidden layers

In [None]:
import copy

hidden_layer = [2,5,10]

for item in hidden_layer:
    num_layers = item
    model = LSTM(input_size, hidden_size, num_layers, n_outputs, sequence_len)
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    loss_function = nn.CrossEntropyLoss()
    
    print("hiddenlayer :", item,"\n",model)
    for i in range(0,3):
        n_epochs = 5
        train_loss = [] # train loss per epoch
        valid_loss = [] # valid loss per epoch
        
        train_acc = [] # train accuracy per epoch
        valid_acc = [] # valid accuracy per epoch
        
        # update following two variables whenever valid accuracy improves
        best_acc = 0
        best_model = copy.deepcopy(model)
        
        for epoch in range(n_epochs):
            model.train() # set model as training mode(for compute gradient)
            train_total = 0
            train_correct = 0
            epoch_train_loss = 0
            for i, data in enumerate(train_loader):
                optimizer.zero_grad()
                
                inputs, labels = data[0], data[1]
                outputs = model(inputs.squeeze(1))
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                
                loss = loss_function(outputs, labels)
                epoch_train_loss += loss.item()
                
                loss.backward() # compute gradient
                optimizer.step() # update weight & bias in the model with computed gradient
                
            train_loss.append(epoch_train_loss/len(train_loader))
            train_acc.append(train_correct/train_total)
            
            model.eval() # set model as evaluation mode
            with torch.no_grad():
                valid_total = 0
                valid_correct = 0
                epoch_valid_loss = 0
                for data in valid_loader:
                    inputs, labels = data[0], data[1]
                    outputs = model(inputs.squeeze(1))
                    
                    _, predicted = torch.max(outputs.data, 1)
                    valid_total += labels.size(0)
                    valid_correct += (predicted == labels).sum().item()
                    
                    loss = loss_function(outputs, labels)
                    epoch_valid_loss += loss.item()
                
                valid_loss.append(epoch_valid_loss/len(valid_loader))
                valid_acc.append(valid_correct / valid_total)
                
            print('[{}/{}]'.format(epoch+1, n_epochs))
            print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
            print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
            if valid_correct/valid_total > best_acc:
                print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
                best_acc = valid_correct/valid_total
                best_model = copy.deepcopy(model)
        
        best_model.eval()
        with torch.no_grad():
            test_total = 0
            test_correct = 0
            for data in test_loader:
                inputs, labels = data[0], data[1]
                outputs = best_model(inputs.squeeze(1))
                
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                
            test_acc = test_correct / test_total
            print('test accuracy : {:.3f}'.format(test_acc))    
        print("----------------------------------------------------------------------------------------------------------------------------------------------------")

hiddenlayer : 2 
 LSTM(
  (lstm): LSTM(28, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 0.171	 training accuracy : 0.947
validation loss : 0.090	 validation accuracy : 0.975
[2/5]
training loss : 0.070	 training accuracy : 0.979
validation loss : 0.064	 validation accuracy : 0.982
[3/5]
training loss : 0.057	 training accuracy : 0.983
validation loss : 0.080	 validation accuracy : 0.979
[4/5]
training loss : 0.050	 training accuracy : 0.984
validation loss : 0.062	 validation accuracy : 0.982
[5/5]
training loss : 0.041	 training accuracy : 0.987
validation loss : 0.075	 validation accuracy : 0.980
test accuracy : 0.981
----------------------------------------------------------------------------------------------------------------------------------------------------
[1/5]
training loss : 0.039	 training accuracy : 0.987
validation loss : 0.079	 validation accuracy : 0.980
[2/5]
training loss : 0.034	 training a

# 3-3 Change optimazation

In [None]:
optimizer_list= [torch.optim.Adagrad(model.parameters(), lr = learning_rate), torch.optim.RMSprop(model.parameters(), lr = learning_rate),torch.optim.Adadelta(model.parameters(), lr = learning_rate)]

for item in optimizer_list:
    num_layers = 2
    model = LSTM(input_size, hidden_size, num_layers, n_outputs, sequence_len)
    optimizer = item
    loss_function = nn.CrossEntropyLoss()
    
    print("optimizer :", item,"\n",model)
    for i in range(0,3):
        n_epochs = 5
        train_loss = [] # train loss per epoch
        valid_loss = [] # valid loss per epoch
        
        train_acc = [] # train accuracy per epoch
        valid_acc = [] # valid accuracy per epoch
        
        # update following two variables whenever valid accuracy improves
        best_acc = 0
        best_model = copy.deepcopy(model)
        
        for epoch in range(n_epochs):
            model.train() # set model as training mode(for compute gradient)
            train_total = 0
            train_correct = 0
            epoch_train_loss = 0
            for i, data in enumerate(train_loader):
                optimizer.zero_grad()
                
                inputs, labels = data[0], data[1]
                outputs = model(inputs.squeeze(1))
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                
                loss = loss_function(outputs, labels)
                epoch_train_loss += loss.item()
                
                loss.backward() # compute gradient
                optimizer.step() # update weight & bias in the model with computed gradient
                
            train_loss.append(epoch_train_loss/len(train_loader))
            train_acc.append(train_correct/train_total)
            
            model.eval() # set model as evaluation mode
            with torch.no_grad():
                valid_total = 0
                valid_correct = 0
                epoch_valid_loss = 0
                for data in valid_loader:
                    inputs, labels = data[0], data[1]
                    outputs = model(inputs.squeeze(1))
                    
                    _, predicted = torch.max(outputs.data, 1)
                    valid_total += labels.size(0)
                    valid_correct += (predicted == labels).sum().item()
                    
                    loss = loss_function(outputs, labels)
                    epoch_valid_loss += loss.item()
                
                valid_loss.append(epoch_valid_loss/len(valid_loader))
                valid_acc.append(valid_correct / valid_total)
                
            print('[{}/{}]'.format(epoch+1, n_epochs))
            print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
            print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
            if valid_correct/valid_total > best_acc:
                print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
                best_acc = valid_correct/valid_total
                best_model = copy.deepcopy(model)
        
        best_model.eval()
        with torch.no_grad():
            test_total = 0
            test_correct = 0
            for data in test_loader:
                inputs, labels = data[0], data[1]
                outputs = best_model(inputs.squeeze(1))
                
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                
            test_acc = test_correct / test_total
            print('test accuracy : {:.3f}'.format(test_acc))    
        print("----------------------------------------------------------------------------------------------------------------------------------------------------")

optimizer : Adagrad (
Parameter Group 0
    eps: 1e-10
    initial_accumulator_value: 0
    lr: 0.01
    lr_decay: 0
    weight_decay: 0
) 
 LSTM(
  (lstm): LSTM(28, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 2.305	 training accuracy : 0.102
validation loss : 2.305	 validation accuracy : 0.105
[2/5]
training loss : 2.305	 training accuracy : 0.102
validation loss : 2.305	 validation accuracy : 0.105
[3/5]
training loss : 2.305	 training accuracy : 0.102
validation loss : 2.305	 validation accuracy : 0.105
[4/5]
training loss : 2.305	 training accuracy : 0.102
validation loss : 2.305	 validation accuracy : 0.105
[5/5]
training loss : 2.305	 training accuracy : 0.102
validation loss : 2.305	 validation accuracy : 0.105
test accuracy : 0.101
----------------------------------------------------------------------------------------------------------------------------------------------------
[1/5]
training loss : 2.

#3-4 GRU

## 3-4-1 hidden layer size

In [None]:
hidden_layer = [2,5,10]

for item in hidden_layer:
    num_layers = item
    model = GRU(input_size, hidden_size, num_layers, n_outputs, sequence_len)
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    loss_function = nn.CrossEntropyLoss()
    
    print("hiddenlayer :", item,"\n",model)
    for i in range(0,3):
        n_epochs = 5
        train_loss = [] # train loss per epoch
        valid_loss = [] # valid loss per epoch
        
        train_acc = [] # train accuracy per epoch
        valid_acc = [] # valid accuracy per epoch
        
        # update following two variables whenever valid accuracy improves
        best_acc = 0
        best_model = copy.deepcopy(model)
        
        for epoch in range(n_epochs):
            model.train() # set model as training mode(for compute gradient)
            train_total = 0
            train_correct = 0
            epoch_train_loss = 0
            for i, data in enumerate(train_loader):
                optimizer.zero_grad()
                
                inputs, labels = data[0], data[1]
                outputs = model(inputs.squeeze(1))
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                
                loss = loss_function(outputs, labels)
                epoch_train_loss += loss.item()
                
                loss.backward() # compute gradient
                optimizer.step() # update weight & bias in the model with computed gradient
                
            train_loss.append(epoch_train_loss/len(train_loader))
            train_acc.append(train_correct/train_total)
            
            model.eval() # set model as evaluation mode
            with torch.no_grad():
                valid_total = 0
                valid_correct = 0
                epoch_valid_loss = 0
                for data in valid_loader:
                    inputs, labels = data[0], data[1]
                    outputs = model(inputs.squeeze(1))
                    
                    _, predicted = torch.max(outputs.data, 1)
                    valid_total += labels.size(0)
                    valid_correct += (predicted == labels).sum().item()
                    
                    loss = loss_function(outputs, labels)
                    epoch_valid_loss += loss.item()
                
                valid_loss.append(epoch_valid_loss/len(valid_loader))
                valid_acc.append(valid_correct / valid_total)
                
            print('[{}/{}]'.format(epoch+1, n_epochs))
            print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
            print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
            if valid_correct/valid_total > best_acc:
                print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
                best_acc = valid_correct/valid_total
                best_model = copy.deepcopy(model)
        
        best_model.eval()
        with torch.no_grad():
            test_total = 0
            test_correct = 0
            for data in test_loader:
                inputs, labels = data[0], data[1]
                outputs = best_model(inputs.squeeze(1))
                
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                
            test_acc = test_correct / test_total
            print('test accuracy : {:.3f}'.format(test_acc))    
        print("----------------------------------------------------------------------------------------------------------------------------------------------------")

hiddenlayer : 2 
 GRU(
  (gru): GRU(28, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 0.265	 training accuracy : 0.930
validation loss : 0.161	 validation accuracy : 0.960
[2/5]
training loss : 0.171	 training accuracy : 0.959
validation loss : 0.146	 validation accuracy : 0.966
[3/5]
training loss : 0.356	 training accuracy : 0.928
validation loss : 0.262	 validation accuracy : 0.939
[4/5]
training loss : 0.206	 training accuracy : 0.951
validation loss : 0.206	 validation accuracy : 0.948
[5/5]
training loss : 0.221	 training accuracy : 0.946
validation loss : 0.195	 validation accuracy : 0.953
test accuracy : 0.966
----------------------------------------------------------------------------------------------------------------------------------------------------
[1/5]
training loss : 0.218	 training accuracy : 0.947
validation loss : 0.403	 validation accuracy : 0.913
[2/5]
training loss : 0.263	 training accu

## 3-4-2 Optimizer

In [None]:
optimizer_list= [torch.optim.Adagrad(model.parameters(), lr = learning_rate), torch.optim.RMSprop(model.parameters(), lr = learning_rate),torch.optim.Adadelta(model.parameters(), lr = learning_rate)]

for item in optimizer_list:
    num_layers = 2
    model = GRU(input_size, hidden_size, num_layers, n_outputs, sequence_len)
    optimizer = item
    loss_function = nn.CrossEntropyLoss()
    
    print("optimizer :", item,"\n",model)
    for i in range(0,3):
        n_epochs = 5
        train_loss = [] # train loss per epoch
        valid_loss = [] # valid loss per epoch
        
        train_acc = [] # train accuracy per epoch
        valid_acc = [] # valid accuracy per epoch
        
        # update following two variables whenever valid accuracy improves
        best_acc = 0
        best_model = copy.deepcopy(model)
        
        for epoch in range(n_epochs):
            model.train() # set model as training mode(for compute gradient)
            train_total = 0
            train_correct = 0
            epoch_train_loss = 0
            for i, data in enumerate(train_loader):
                optimizer.zero_grad()
                
                inputs, labels = data[0], data[1]
                outputs = model(inputs.squeeze(1))
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                
                loss = loss_function(outputs, labels)
                epoch_train_loss += loss.item()
                
                loss.backward() # compute gradient
                optimizer.step() # update weight & bias in the model with computed gradient
                
            train_loss.append(epoch_train_loss/len(train_loader))
            train_acc.append(train_correct/train_total)
            
            model.eval() # set model as evaluation mode
            with torch.no_grad():
                valid_total = 0
                valid_correct = 0
                epoch_valid_loss = 0
                for data in valid_loader:
                    inputs, labels = data[0], data[1]
                    outputs = model(inputs.squeeze(1))
                    
                    _, predicted = torch.max(outputs.data, 1)
                    valid_total += labels.size(0)
                    valid_correct += (predicted == labels).sum().item()
                    
                    loss = loss_function(outputs, labels)
                    epoch_valid_loss += loss.item()
                
                valid_loss.append(epoch_valid_loss/len(valid_loader))
                valid_acc.append(valid_correct / valid_total)
                
            print('[{}/{}]'.format(epoch+1, n_epochs))
            print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
            print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
            if valid_correct/valid_total > best_acc:
                print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
                best_acc = valid_correct/valid_total
                best_model = copy.deepcopy(model)
        
        best_model.eval()
        with torch.no_grad():
            test_total = 0
            test_correct = 0
            for data in test_loader:
                inputs, labels = data[0], data[1]
                outputs = best_model(inputs.squeeze(1))
                
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                
            test_acc = test_correct / test_total
            print('test accuracy : {:.3f}'.format(test_acc))    
        print("----------------------------------------------------------------------------------------------------------------------------------------------------")

optimizer : Adagrad (
Parameter Group 0
    eps: 1e-10
    initial_accumulator_value: 0
    lr: 0.01
    lr_decay: 0
    weight_decay: 0
) 
 GRU(
  (gru): GRU(28, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 2.306	 training accuracy : 0.112
validation loss : 2.305	 validation accuracy : 0.111
[2/5]
training loss : 2.306	 training accuracy : 0.112
validation loss : 2.305	 validation accuracy : 0.111
[3/5]
training loss : 2.306	 training accuracy : 0.112
validation loss : 2.305	 validation accuracy : 0.111
[4/5]
training loss : 2.306	 training accuracy : 0.112
validation loss : 2.305	 validation accuracy : 0.111
[5/5]
training loss : 2.306	 training accuracy : 0.112
validation loss : 2.305	 validation accuracy : 0.111
test accuracy : 0.114
----------------------------------------------------------------------------------------------------------------------------------------------------
[1/5]
training loss : 2.306

#3-5 Change parameter(bias)

In [None]:
class LSTM_bias(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, n_outputs, sequence_len):
        super(LSTM_bias, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,bias= False)
        self.fc = nn.Linear(hidden_size * sequence_len, n_outputs)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        x, _ = self.lstm(x, (h0, c0))
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

In [None]:
#bias parameter가 False인 모델의 layer 수를 바꿔가며 비교

hidden_layer = [2,5,10]

for item in hidden_layer:
    num_layers = item
    model = LSTM_bias(input_size, hidden_size, num_layers, n_outputs, sequence_len)
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    loss_function = nn.CrossEntropyLoss()

    print("hiddenlayer :", item,"\n",model)
    for i in range(0,3):
        n_epochs = 5
        train_loss = [] # train loss per epoch
        valid_loss = [] # valid loss per epoch
        
        train_acc = [] # train accuracy per epoch
        valid_acc = [] # valid accuracy per epoch
        
        # update following two variables whenever valid accuracy improves
        best_acc = 0
        best_model = copy.deepcopy(model)
        
        for epoch in range(n_epochs):
            model.train() # set model as training mode(for compute gradient)
            train_total = 0
            train_correct = 0
            epoch_train_loss = 0
            for i, data in enumerate(train_loader):
                optimizer.zero_grad()
                
                inputs, labels = data[0], data[1]
                outputs = model(inputs.squeeze(1))
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                
                loss = loss_function(outputs, labels)
                epoch_train_loss += loss.item()
                
                loss.backward() # compute gradient
                optimizer.step() # update weight & bias in the model with computed gradient
                
            train_loss.append(epoch_train_loss/len(train_loader))
            train_acc.append(train_correct/train_total)
            
            model.eval() # set model as evaluation mode
            with torch.no_grad():
                valid_total = 0
                valid_correct = 0
                epoch_valid_loss = 0
                for data in valid_loader:
                    inputs, labels = data[0], data[1]
                    outputs = model(inputs.squeeze(1))
                    
                    _, predicted = torch.max(outputs.data, 1)
                    valid_total += labels.size(0)
                    valid_correct += (predicted == labels).sum().item()
                    
                    loss = loss_function(outputs, labels)
                    epoch_valid_loss += loss.item()
                
                valid_loss.append(epoch_valid_loss/len(valid_loader))
                valid_acc.append(valid_correct / valid_total)
                
            print('[{}/{}]'.format(epoch+1, n_epochs))
            print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
            print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
            if valid_correct/valid_total > best_acc:
                print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
                best_acc = valid_correct/valid_total
                best_model = copy.deepcopy(model)
        
        best_model.eval()
        with torch.no_grad():
            test_total = 0
            test_correct = 0
            for data in test_loader:
                inputs, labels = data[0], data[1]
                outputs = best_model(inputs.squeeze(1))
                
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                
            test_acc = test_correct / test_total
            print('test accuracy : {:.3f}'.format(test_acc))    
        print("----------------------------------------------------------------------------------------------------------------------------------------------------")

hiddenlayer : 2 
 LSTM_bias(
  (lstm): LSTM(28, 64, num_layers=2, bias=False, batch_first=True)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 0.171	 training accuracy : 0.947
validation loss : 0.087	 validation accuracy : 0.973
[2/5]
training loss : 0.071	 training accuracy : 0.978
validation loss : 0.060	 validation accuracy : 0.980
[3/5]
training loss : 0.055	 training accuracy : 0.983
validation loss : 0.076	 validation accuracy : 0.978
[4/5]
training loss : 0.046	 training accuracy : 0.985
validation loss : 0.067	 validation accuracy : 0.982
[5/5]
training loss : 0.040	 training accuracy : 0.987
validation loss : 0.068	 validation accuracy : 0.982
test accuracy : 0.980
----------------------------------------------------------------------------------------------------------------------------------------------------
[1/5]
training loss : 0.034	 training accuracy : 0.989
validation loss : 0.063	 validation accuracy : 0.984
[2/5]
training loss : 

#3-6 Compare accuracy of RNN/CNN

#3-7 L2 (ridge) regularization 

In [None]:
input_size = 28
sequence_len = 28
num_layers = 2
hidden_size = 64
n_outputs = 10

learning_rate = 0.01

model = LSTM(input_size, hidden_size, num_layers, n_outputs, sequence_len)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay= 1e-5)
loss_function = nn.CrossEntropyLoss()

In [None]:
n_epochs = 5

train_loss = [] # train loss per epoch
valid_loss = [] # valid loss per epoch

train_acc = [] # train accuracy per epoch
valid_acc = [] # valid accuracy per epoch

# update following two variables whenever valid accuracy improves
best_acc = 0
best_model = copy.deepcopy(model)

for epoch in range(n_epochs):
    model.train() # set model as training mode(for compute gradient)
    train_total = 0
    train_correct = 0
    epoch_train_loss = 0
    for i, data in enumerate(train_loader):
        # In PyTorch, for every mini-batch during the training phase, we have to explicitly
        # set the gradients to zero before starting to do backpropragation with following code
        optimizer.zero_grad()

        inputs, labels = data[0], data[1]
        outputs = model(inputs.squeeze(1))
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

        loss = loss_function(outputs, labels)
        epoch_train_loss += loss.item()

        loss.backward() # compute gradient
        optimizer.step() # update weight & bias in the model with computed gradient

    train_loss.append(epoch_train_loss/len(train_loader))
    train_acc.append(train_correct/train_total)

    model.eval() # set model as evaluation mode
    with torch.no_grad():# we don't need to compute gradient during the evaluation process
        valid_total = 0
        valid_correct = 0
        epoch_valid_loss = 0
        for data in valid_loader:
            inputs, labels = data[0], data[1]
            outputs = model(inputs.squeeze(1))

            _, predicted = torch.max(outputs.data, 1)
            valid_total += labels.size(0)
            valid_correct += (predicted == labels).sum().item()

            loss = loss_function(outputs, labels)
            epoch_valid_loss += loss.item()

        valid_loss.append(epoch_valid_loss/len(valid_loader))
        valid_acc.append(valid_correct / valid_total)

    print('[{}/{}]'.format(epoch+1, n_epochs))
    print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
    print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))

    if valid_correct/valid_total > best_acc:
        print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
        best_acc = valid_correct/valid_total
        best_model = copy.deepcopy(model)

[1/5]
training loss : 0.177	 training accuracy : 0.945
validation loss : 0.075	 validation accuracy : 0.977
[2/5]
training loss : 0.075	 training accuracy : 0.977
validation loss : 0.068	 validation accuracy : 0.979
[3/5]
training loss : 0.064	 training accuracy : 0.980
validation loss : 0.063	 validation accuracy : 0.979
[4/5]
training loss : 0.056	 training accuracy : 0.983
validation loss : 0.058	 validation accuracy : 0.982
[5/5]
training loss : 0.052	 training accuracy : 0.984
validation loss : 0.099	 validation accuracy : 0.970


# 3-8 Add Dropout

In [None]:
class LSTM_DR(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, n_outputs, sequence_len, drop_prop):
        super(LSTM_DR, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,dropout = drop_prop)
        self.fc = nn.Linear(hidden_size * sequence_len, n_outputs)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        x, _ = self.lstm(x, (h0, c0))
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

In [None]:
prop = [0.1,0.2,0.3]
train_loss = [] # train loss per epoch
valid_loss = [] # valid loss per epoch
        
train_acc = [] # train accuracy per epoch
valid_acc = [] # valid accuracy per epoch

for item in prop:
    drop_prop = item
    model = LSTM_DR(input_size, hidden_size, num_layers, n_outputs, sequence_len, drop_prop)
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    loss_function = nn.CrossEntropyLoss()
    
    print("Dropout property :", item,"\n",model)
    
    n_epochs = 5
    best_acc = 0
    best_model = copy.deepcopy(model)
        
    for epoch in range(n_epochs):
        model.train() # set model as training mode(for compute gradient)
        train_total = 0
        train_correct = 0
        epoch_train_loss = 0
        for i, data in enumerate(train_loader):
            optimizer.zero_grad()
                
            inputs, labels = data[0], data[1]
            outputs = model(inputs.squeeze(1))
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
                
            loss = loss_function(outputs, labels)
            epoch_train_loss += loss.item()
                
            loss.backward() # compute gradient
            optimizer.step() # update weight & bias in the model with computed gradient
                
        train_loss.append(epoch_train_loss/len(train_loader))
        train_acc.append(train_correct/train_total)
            
        model.eval() # set model as evaluation mode
        with torch.no_grad():
            valid_total = 0
            valid_correct = 0
            epoch_valid_loss = 0
            for data in valid_loader:
                inputs, labels = data[0], data[1]
                outputs = model(inputs.squeeze(1))
                    
                _, predicted = torch.max(outputs.data, 1)
                valid_total += labels.size(0)
                valid_correct += (predicted == labels).sum().item()
                    
                loss = loss_function(outputs, labels)
                epoch_valid_loss += loss.item()
                
            valid_loss.append(epoch_valid_loss/len(valid_loader))
            valid_acc.append(valid_correct / valid_total)
                
        print('[{}/{}]'.format(epoch+1, n_epochs))
        print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
        print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
        if valid_correct/valid_total > best_acc:
            print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
            best_acc = valid_correct/valid_total
            best_model = copy.deepcopy(model)
        
    best_model.eval()
    with torch.no_grad():
        test_total = 0
        test_correct = 0
        for data in test_loader:
            inputs, labels = data[0], data[1]
            outputs = best_model(inputs.squeeze(1))
                
            _, predicted = torch.max(outputs.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
                
        test_acc = test_correct / test_total
        print('test accuracy : {:.3f}'.format(test_acc))    

Dropout property : 0.1 
 LSTM_DR(
  (lstm): LSTM(28, 64, num_layers=2, batch_first=True, dropout=0.1)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 0.175	 training accuracy : 0.946
validation loss : 0.096	 validation accuracy : 0.973
[2/5]
training loss : 0.080	 training accuracy : 0.974
validation loss : 0.070	 validation accuracy : 0.978
[3/5]
training loss : 0.065	 training accuracy : 0.980
validation loss : 0.073	 validation accuracy : 0.977
[4/5]
training loss : 0.058	 training accuracy : 0.982
validation loss : 0.062	 validation accuracy : 0.981
[5/5]
training loss : 0.050	 training accuracy : 0.984
validation loss : 0.066	 validation accuracy : 0.980
test accuracy : 0.981
Dropout property : 0.2 
 LSTM_DR(
  (lstm): LSTM(28, 64, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=1792, out_features=10, bias=True)
)
[1/5]
training loss : 0.181	 training accuracy : 0.943
validation loss : 0.070	 validation accuracy : 0.979


#3-9 Save checkpoint

## Dropout = 0.25인 LSTM을 save

In [None]:
import os

def save_model(path, model, optimizer, epoch, loss):

    torch.save({
        'epoch' : epoch,
        'model_state_dict' : model.state_dict(),
        'optimizer_state_dict' : optimizer.state_dict(),
        'loss' : loss
    }, os.path.join(path, 'cnn_model.pt'))
    print('model saved, {}'.format(os.path.join(path, 'cnn_model.pt')))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
save_dir = '/content/drive/MyDrive/ColabNotebooks/2022_ML_hw4'

train_loss = [] # train loss per epoch
valid_loss = [] # valid loss per epoch
        
train_acc = [] # train accuracy per epoch
valid_acc = [] # valid accuracy per epoch


model = LSTM_DR(input_size, hidden_size, num_layers, n_outputs, sequence_len, 0.25)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
loss_function = nn.CrossEntropyLoss()
    
n_epochs = 5
   
best_acc = 0
best_model = copy.deepcopy(model)
        
for epoch in range(n_epochs):
    model.train() # set model as training mode(for compute gradient)
    train_total = 0
    train_correct = 0
    epoch_train_loss = 0
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
                
        inputs, labels = data[0], data[1]
        outputs = model(inputs.squeeze(1))
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
                
        loss = loss_function(outputs, labels)
        epoch_train_loss += loss.item()
                
        loss.backward() # compute gradient
        optimizer.step() # update weight & bias in the model with computed gradient
                
    train_loss.append(epoch_train_loss/len(train_loader))
    train_acc.append(train_correct/train_total)
            
    model.eval() # set model as evaluation mode
    with torch.no_grad():
        valid_total = 0
        valid_correct = 0
        epoch_valid_loss = 0
        for data in valid_loader:
            inputs, labels = data[0], data[1]
            outputs = model(inputs.squeeze(1))
                    
            _, predicted = torch.max(outputs.data, 1)
            valid_total += labels.size(0)
            valid_correct += (predicted == labels).sum().item()
                    
            loss = loss_function(outputs, labels)
            epoch_valid_loss += loss.item()
                
        valid_loss.append(epoch_valid_loss/len(valid_loader))
        valid_acc.append(valid_correct / valid_total)
                
    print('[{}/{}]'.format(epoch+1, n_epochs))
    print('training loss : {:.3f}\t training accuracy : {:.3f}'.format(epoch_train_loss/len(train_loader), train_correct/train_total))
    print('validation loss : {:.3f}\t validation accuracy : {:.3f}'.format(epoch_valid_loss/len(valid_loader), valid_correct/valid_total))
            
    if valid_correct/valid_total > best_acc:
        print('validation accuracy improved {:.5f} ======> {:.5f}'.format(best_acc, valid_correct/valid_total))
        best_acc = valid_correct/valid_total
        best_model = copy.deepcopy(model)
        save_model(save_dir, model, optimizer, epoch, epoch_train_loss)

[1/5]
training loss : 0.186	 training accuracy : 0.942
validation loss : 0.071	 validation accuracy : 0.979
model saved, /content/drive/MyDrive/ColabNotebooks/2022_ML_hw4/cnn_model.pt
[2/5]
training loss : 0.086	 training accuracy : 0.974
validation loss : 0.067	 validation accuracy : 0.981
model saved, /content/drive/MyDrive/ColabNotebooks/2022_ML_hw4/cnn_model.pt
[3/5]
training loss : 0.070	 training accuracy : 0.978
validation loss : 0.059	 validation accuracy : 0.982
model saved, /content/drive/MyDrive/ColabNotebooks/2022_ML_hw4/cnn_model.pt
[4/5]
training loss : 0.063	 training accuracy : 0.980
validation loss : 0.060	 validation accuracy : 0.981
[5/5]
training loss : 0.061	 training accuracy : 0.981
validation loss : 0.048	 validation accuracy : 0.984
model saved, /content/drive/MyDrive/ColabNotebooks/2022_ML_hw4/cnn_model.pt


In [None]:
best_model.eval()
with torch.no_grad():
    test_total = 0
    test_correct = 0
    for data in test_loader:
        inputs, labels = data[0], data[1]
        outputs = best_model(inputs.squeeze(1))
                
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
                
    test_acc = test_correct / test_total
    print('test accuracy : {:.3f}'.format(test_acc)) 

test accuracy : 0.983


In [None]:
loaded_model = LSTM_DR(input_size, hidden_size, num_layers, n_outputs, sequence_len, 0.25)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

checkpoint = torch.load(os.path.join(save_dir, 'cnn_model.pt'))
loaded_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
print('model successfully loaded!')

loaded_model.eval()
with torch.no_grad():
    test_total = 0
    test_correct = 0
    for data in test_loader:
        inputs, labels = data[0], data[1]
        outputs = loaded_model(inputs.squeeze(1))

        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

    test_acc = test_correct / test_total
    print('test accuracy : {:.3f}'.format(test_acc))

model successfully loaded!
test accuracy : 0.983
