In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)



train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [1]:
import numpy as np
import matplotlib.pyplot as plt

class linear():
    def __init__(self, input_dim, output_dim):
        self.weight = np.random.randn(input_dim, output_dim) * 0.01
        self.bias = np.zeros(output_dim)
    
    def forward(self, input):
        return np.dot(input, self.weight) + self.bias
    
    def backward(self, input, output_gradient, learning_rate): # Backpropagation
        # input: 뉴런에 들어가는 값
        # output_gradient: 다음 뉴런의 기울기
        input_gradient = np.dot(output_gradient, self.weight.T) # 다음 layer에 넘겨줄 gradient
        weight_gradient = np.dot(input.T, output_gradient) # 업데이트 할 weight gradient
        bias_gradient = np.sum(output_gradient, axis=0)
        
        self.weight -= learning_rate*weight_gradient
        self.bias -= learning_rate*bias_gradient
        
        return input_gradient
    
class ReLU():
    def forward(self, input):
        return np.maximum(0, input)
    
    def backward(self, input, output_gradient):
        grad = input > 0
        return grad * output_gradient   
              
def SoftMax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

def CrossEntropyLoss(y, y_pred):
    c = 1e-10
    y_pred = np.clip(y_pred, c ,1-c)
    return -np.sum(y * np.log(y_pred))



In [2]:
class NN_3_layer():
    def __init__(self, in_feature, hid_feature, out_feature):
        self.inputs = [[] for i in range(6)]
        self.layer1 = linear(in_feature, hid_feature)
        self.relu1 = ReLU()
        self.layer2 = linear(hid_feature, hid_feature)
        self.relu2 = ReLU()
        self.layer3 = linear(hid_feature, out_feature)
    
    def forward(self, input):
        self.inputs[0] = input
        self.inputs[1] = self.layer1.forward(self.inputs[0])
        self.inputs[2] = self.relu1.forward(self.inputs[1])
        self.inputs[3] = self.layer2.forward(self.inputs[2])
        self.inputs[4] = self.relu2.forward(self.inputs[3])
        self.inputs[5] = self.layer3.forward(self.inputs[4])
        output = SoftMax(self.inputs[5])
        return output
    
    def backward(self, output_gradient, learning_rate):
        inputs = self.inputs
        output_gradient = self.layer3.backward(inputs[4], output_gradient, learning_rate)
        output_gradient = self.relu2.backward(inputs[3], output_gradient)
        output_gradient = self.layer2.backward(inputs[2], output_gradient, learning_rate)
        output_gradient = self.relu1.backward(inputs[1], output_gradient)
        output_gradient = self.layer1.backward(inputs[0], output_gradient, learning_rate)
        return output_gradient
    

In [3]:
from dataset.dataloader import Dataloader

model = NN_3_layer(in_feature=784, hid_feature=128, out_feature=10)

train_loader = Dataloader('dataset/', is_train=True, batch_size=8, shuffle=True)
test_loader = Dataloader('dataset/', is_train=False, batch_size=8, shuffle=False)

input_data = np.random.randn(28, 28)
input_data = input_data.reshape(1, 784)

epochs = 100
learning_rate = 0.01

train_loss_list = []
test_loss_list = []

for i in range(epochs):
    running_train_loss = 0
    for images, labels in train_loader:
        input_data = images.reshape(images.shape[0], -1)
        output = model.forward(input_data)
        running_train_loss += CrossEntropyLoss(labels, output)
               
        output_gradient = output - labels
        model.backward(output_gradient, learning_rate)
        
    train_loss_list.append(running_train_loss / train_loader.__len__())
    print(f"Epoch [{i+1}/{epochs}]\tLoss: {running_train_loss / train_loader.__len__()}")
    
    running_test_loss = 0
    for images, labels in test_loader:
        input_data = images.reshape(images.shape[0], -1)
        output = model.forward(input_data)
        running_test_loss += CrossEntropyLoss(labels, output)
        
    test_loss_list.append(running_test_loss / test_loader.__len__())
    

plt.plot(train_loss_list, label='Train')
plt.plot(test_loss_list, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train / Test Loss')
plt.legend()
plt.show()

Epoch [1/100]	Loss: 3.085006306205669
Epoch [2/100]	Loss: 0.9506636384887337
Epoch [3/100]	Loss: 0.6514360874251056
Epoch [4/100]	Loss: 0.47858613923250004
Epoch [5/100]	Loss: 0.3939933673027095
Epoch [6/100]	Loss: 0.33288079495430156
Epoch [7/100]	Loss: 0.2787556134139262
Epoch [8/100]	Loss: 0.23216699751107994
Epoch [9/100]	Loss: 0.2257899035116424
Epoch [10/100]	Loss: 0.20341254852986304
Epoch [11/100]	Loss: 0.18879278398143046
Epoch [12/100]	Loss: 0.19603566506110853
Epoch [13/100]	Loss: 0.15470418450253837
Epoch [14/100]	Loss: 0.1285941574429548
Epoch [15/100]	Loss: 0.16253840849741738
Epoch [16/100]	Loss: 0.15238680977130523
Epoch [17/100]	Loss: 0.1583780474296148
Epoch [18/100]	Loss: 0.10091057354279152
Epoch [19/100]	Loss: 0.08987138276690881
Epoch [20/100]	Loss: 0.11706335164553536
Epoch [21/100]	Loss: 0.10442144863758226
Epoch [22/100]	Loss: 0.1781132649758604
Epoch [23/100]	Loss: 0.1551930215061094
Epoch [24/100]	Loss: 0.13159738875685015
Epoch [25/100]	Loss: 0.1056769089079

In [None]:
print(train_loader.__len__())

In [None]:
for images, labels in test_loader:
    images = images.reshape(images.shape[0], -1)  # (배치 크기, 784)
    outputs = model.forward(images)

