In [15]:
import sys
import os
import numpy as np
from collections import OrderedDict
sys.path.append(os.pardir)  # 상위 디렉토리의 파일을 가져올 수 있도록 설정
from dataset.mnist import load_mnist
from common.layers import Affine, Relu, SoftmaxWithLoss
from common.gradient import numerical_gradient

class MultiLayerNet:
    def __init__(self, input_size, hidden_sizes, output_size, weight_init_std=0.01):
        self.params = {}
        self.layers = OrderedDict()
        
        # 모든 계층을 초기화하는 루프
        layer_sizes = [input_size] + hidden_sizes + [output_size]
        for i in range(len(layer_sizes) - 1):
            self.params[f'W{i+1}'] = weight_init_std * np.random.randn(layer_sizes[i], layer_sizes[i+1])
            self.params[f'b{i+1}'] = np.zeros(layer_sizes[i+1])
            self.layers[f'Affine{i+1}'] = Affine(self.params[f'W{i+1}'], self.params[f'b{i+1}'])
            if i < len(layer_sizes) - 2:  # 마지막 Affine 계층 전까지 Relu 계층 추가
                self.layers[f'Relu{i+1}'] = Relu()
        
        self.lastLayer = SoftmaxWithLoss()  # 출력 계층

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        return np.sum(y == t) / float(x.shape[0])

    def gradient(self, x, t):
        # 순전파
        self.loss(x, t)
        
        # 역전파
        dout = 1
        dout = self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        for idx, layer in enumerate(self.layers):
            if isinstance(layer, Affine):  # Affine 계층에서만 기울기 저장
                grads[f'W{idx+1}'] = layer.dW
                grads[f'b{idx+1}'] = layer.db
        return grads

if __name__ == '__main__':
    # 데이터 읽기
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
    
    # 네트워크 생성
    network = MultiLayerNet(input_size=784, hidden_sizes=[50, 30], output_size=10)
    
    iters_num = 10000
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.01
    
    train_loss_list = []
    train_acc_list = []
    test_acc_list = []
    
    iter_per_epoch = max(train_size / batch_size, 1)
    
    for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]
        
        grad = network.gradient(x_batch, t_batch)
        
        for key in network.params.keys():
            network.params[key] -= learning_rate * grad[key]
        
        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)
        
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print(f"train acc, test acc | {train_acc:.6f}, {test_acc:.6f}")

ModuleNotFoundError: No module named 'dataset.mnist'