# ch05/layer_naive.py

# ch05/buy_apple.py

# ch05/buy_apple_orange.py

# ch05/two_layer_net.py

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TwoLayerNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        super(TwoLayerNet, self).__init__()
        # レイヤの生成
        self.affine1 = nn.Linear(input_size, hidden_size)
        self.affine2 = nn.Linear(hidden_size, output_size)

        # Initialize weights and biases
        nn.init.normal_(self.affine1.weight, mean=0.0, std=weight_init_std)
        nn.init.constant_(self.affine1.bias, 0.0)
        nn.init.normal_(self.affine2.weight, mean=0.0, std=weight_init_std)
        nn.init.constant_(self.affine2.bias, 0.0)

    def forward(self, x):
        x = self.affine1(x)
        x = F.relu(x)
        x = self.affine2(x)
        return x

    def predict(self, x):
        with torch.no_grad():
            logits = self.forward(x)
            probs = F.softmax(logits, dim=1)
        return probs
    
    # x:入力データ, t:教師データ
    def loss(self, x, t):
        logits = self.forward(x)
        return F.cross_entropy(logits, t)

    def accuracy(self, x, t):
        with torch.no_grad():
            logits = self.forward(x)
            preds = torch.argmax(logits, dim=1)
            if len(t.shape) != 1:
                t = torch.argmax(t, dim=1)
            accuracy = (preds == t).float().mean().item()
        return accuracy

    #いらない
    # def numerical_gradient(self, x, t):
        
    def gradient(self, x, t):
        loss = self.loss(x, t)
        self.zero_grad()  # Clear previous gradients
        loss.backward()   # Compute gradients
        return { 
            'W1': self.affine1.weight.grad,
            'b1': self.affine1.bias.grad,
            'W2': self.affine2.weight.grad,
            'b2': self.affine2.bias.grad,
        }


# ch05/gradient_check.py

# ch05/train_neuralnet.py

In [5]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from dataset.mnist import load_mnist
import numpy as np
# Load MNIST dataset
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)


In [25]:
# Initialize the network
input_size = 784
hidden_size = 50
output_size = 10
learning_rate = 0.1
network = TwoLayerNet(input_size=input_size, hidden_size=hidden_size, output_size=output_size)

# Optimizer
optimizer = torch.optim.SGD(network.parameters(), lr=learning_rate)

# Training parameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)


# Training loop
for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    #change from numpy to tensor to work with torch libary 
    x_batch = torch.from_numpy(x_batch).to(torch.float32)
    t_batch = torch.from_numpy(t_batch).to(torch.float32)

    loss = network.loss(x_batch, t_batch)

    # Backpropagation and parameter update
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_loss_list.append(loss.item())

    # Accuracy calculation
    if i % iter_per_epoch == 0:
        # Training accuracy
        train_correct = 0
        train_total = 0
        #change from numpy to tensor to work with torch libary 
        x_batch = torch.from_numpy(x_train).to(torch.float32)
        t_batch = torch.from_numpy(t_train).to(torch.float32)
        x_batch = x_batch.view(x_batch.size(0), -1)
        preds = network.predict(x_batch)
        train_correct += (torch.argmax(preds, dim=1) == torch.argmax(t_batch,dim=1)).sum().item()
        train_total += t_batch.size(0)
        train_acc = train_correct / train_total

        # Test accuracy
        test_correct = 0
        test_total = 0
        x_batch = torch.from_numpy(x_test).to(torch.float32)
        t_batch = torch.from_numpy(t_test).to(torch.float32)
        x_batch = x_batch.view(x_batch.size(0), -1)

        preds = network.predict(x_batch)
        test_correct += (torch.argmax(preds, dim=1) == torch.argmax(t_batch, dim=1)).sum().item()
        test_total += t_batch.size(0)
        test_acc = test_correct / test_total

        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)


0.09625 0.0962
0.9028 0.907
0.9214666666666667 0.9251
0.9348666666666666 0.9343
0.9441 0.9435
0.9501166666666667 0.9484
0.9544166666666667 0.953
0.9602333333333334 0.957
0.9632833333333334 0.9583
0.9662 0.9603
0.9655333333333334 0.9602
0.971 0.9647
0.9729166666666667 0.9665
0.9740666666666666 0.9668
0.9750333333333333 0.966
0.9778 0.9695
0.9779 0.9696
