## GPUでの学習

### .to(device)

In [1]:
import torch

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
a = torch.randn(10)
b = a.to(device)

In [4]:
# 同じデバイスにあるTensor同士しか演算できない
# a + b

In [5]:
import torch
import time

# CPU上でTensorを作成
tensor_cpu = torch.randn(10000, 10000)

# GPU上でTensorを作成 (もしGPUが利用可能なら)
device = "cuda" if torch.cuda.is_available() else "cpu"
tensor_gpu = tensor_cpu.to(device)

# CPU上での計算の時間を測定
start_time = time.time()
result_cpu = torch.mm(tensor_cpu, tensor_cpu)
end_time = time.time()
print(f"Time taken on CPU: {end_time - start_time:.5f} seconds")

# GPU上での計算の時間を測定 (もしGPUが利用可能なら)
if device == "cuda":
    start_time = time.time()
    result_gpu = torch.mm(tensor_gpu, tensor_gpu)
    end_time = time.time()
    print(f"Time taken on GPU: {end_time - start_time:.5f} seconds")

Time taken on CPU: 35.83416 seconds
Time taken on GPU: 3.53376 seconds


### MNISTのMLP学習ループ

In [6]:
import torch
from sklearn import datasets
import matplotlib.pyplot as plt
from torch.nn import functional as F
from sklearn.model_selection import train_test_split
import numpy as np

In [7]:
device = 'cuda'
# ======モデル======
class Linear():
    def __init__(self, in_features, out_features, n):
        self.W = (torch.randn((out_features, in_features)) * torch.sqrt(torch.tensor(2.0 / n))).to(device)
        self.W.requires_grad = True
        self.b = torch.zeros((1, out_features)).to(device)
        self.b.requires_grad = True

    def forward(self, X):
        self.X = X
        self.Z = X @ self.W.T + self.b
        return self.Z

    def backward(self, Z):
        self.W.grad_ = Z.grad_.T @ self.X
        self.b.grad_ = torch.sum(Z.grad_, dim=0)
        self.X.grad_ = Z.grad_ @ self.W
        return self.X.grad_

class ReLU():
    def forward(self, X):
        self.X = X
        return X.clamp_min(0.)

    def backward(self, A):
        return A.grad_ * (self.X > 0).float()

class SoftmaxCrossEntropy:
    def forward(self, X, y):
        e_x = torch.exp(X - torch.max(X, dim=-1, keepdim=True)[0])
        self.softmax_out = e_x / (torch.sum(e_x, dim=-1, keepdim=True) + 1e-10)

        log_probs = torch.log(self.softmax_out + 1e-10)
        target_log_probs = log_probs * y

        self.loss = -target_log_probs.sum(dim=-1).mean()
        return self.loss

    def backward(self, y):
        return (self.softmax_out - y) / y.shape[0]

class Model:
    def __init__(self, input_features, hidden_units, output_units, data_num):
        self.linear1 = Linear(input_features, hidden_units, data_num)
        self.relu = ReLU()
        self.linear2 = Linear(hidden_units, output_units, data_num)
        self.loss_fn = SoftmaxCrossEntropy()

    def forward(self, X, y):
        self.X = X
        self.Z1 = self.linear1.forward(X)
        self.A1 = self.relu.forward(self.Z1)
        self.Z2 = self.linear2.forward(self.A1)
        self.loss = self.loss_fn.forward(self.Z2, y)
        return self.loss, self.Z2

    def backward(self, y):
        self.Z2.grad_ = self.loss_fn.backward(y)
        self.A1.grad_ = self.linear2.backward(self.Z2)
        self.Z1.grad_ = self.relu.backward(self.A1)
        self.X.grad_ = self.linear1.backward(self.Z1)

    def zero_grad(self):
        # 勾配の初期化
        self.linear1.W.grad_ = None
        self.linear1.b.grad_ = None
        self.linear2.W.grad_ = None
        self.linear2.b.grad_ = None

    def step(self, learning_rate):
        # パラメータの更新
        self.linear1.W -= learning_rate * self.linear1.W.grad_
        self.linear1.b -= learning_rate * self.linear1.b.grad_
        self.linear2.W -= learning_rate * self.linear2.W.grad_
        self.linear2.b -= learning_rate * self.linear2.b.grad_

## Refactoring後の学習ループ(OptimizerやDataset, Dataloaderは後ほどRefactoring)
# ===データの準備====
dataset = datasets.load_digits()
data = dataset['data']
target = dataset['target']
images = dataset['images']
X_train, X_val, y_train, y_val = train_test_split(images, target, test_size=0.2, random_state=42)
X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train = (X_train - X_train_mean) / X_train_std
X_val = (X_val - X_train_mean) / X_train_std
X_train = torch.tensor(X_train.reshape(-1, 64), dtype=torch.float32).to(device)
X_val = torch.tensor(X_val.reshape(-1, 64), dtype=torch.float32).to(device)
y_train = F.one_hot(torch.tensor(y_train), num_classes=10).to(device) #1437 x 10
y_val = F.one_hot(torch.tensor(y_val), num_classes=10).to(device) # 360 x 10
batch_size = 30
# モデルの初期化
model = Model(input_features=64, hidden_units=1000, output_units=10, data_num=batch_size)

learning_rate = 0.01

# ログ
train_losses = []
val_losses = []
val_accuracies = []
for epoch in range(100):
    # エポック毎にデータをシャッフル
    shuffled_indices = np.random.permutation(len(y_train))
    num_batches = np.ceil(len(y_train)/batch_size).astype(int)
    running_loss = 0.0
    start_time = time.time()

    for i in range(num_batches):

        # mini batch作成
        start = i * batch_size
        end = start + batch_size

        batch_indices = shuffled_indices[start:end]
        y_true_ = y_train[batch_indices, :] # batch_size x 10

        X = X_train[batch_indices] # batch_size x 64
        # 順伝播と逆伝播の計算
        loss, _ = model.forward(X, y_true_)
        model.backward(y_true_)
        running_loss += loss.item()

        # パラメータ更新
        with torch.no_grad():
            model.step(learning_rate)

        model.zero_grad()

    # validation
    with torch.no_grad():
        val_loss, Z2_val = model.forward(X_val, y_val)

        val_accuracy = torch.sum(torch.argmax(Z2_val, dim=-1) == torch.argmax(y_val, dim=-1)) / y_val.shape[0]

    train_losses.append(running_loss/num_batches)
    val_losses.append(val_loss.item())
    val_accuracies.append(val_accuracy)
    end_time = time.time()
    epoch_duration = end_time - start_time
    print(f'epoch: {epoch}: train error: {running_loss/num_batches}, validation error: {val_loss.item()}, validation accuracy: {val_accuracy}, epoch duration {epoch_duration:.2f} sec')

epoch: 0: train error: 3.020025840650002, validation error: 7.395155429840088, validation accuracy: 0.6083333492279053, epoch duration 0.14 sec
epoch: 1: train error: 0.6960858986712992, validation error: 5.725339412689209, validation accuracy: 0.6805555820465088, epoch duration 0.05 sec
epoch: 2: train error: 0.39150557784887496, validation error: 5.293946266174316, validation accuracy: 0.699999988079071, epoch duration 0.06 sec
epoch: 3: train error: 0.2570005700690672, validation error: 5.015928268432617, validation accuracy: 0.7138888835906982, epoch duration 0.05 sec
epoch: 4: train error: 0.18378105639324835, validation error: 5.0969672203063965, validation accuracy: 0.7111111283302307, epoch duration 0.06 sec
epoch: 5: train error: 0.1510870855029983, validation error: 4.842778205871582, validation accuracy: 0.7194444537162781, epoch duration 0.05 sec
epoch: 6: train error: 0.1166465838032309, validation error: 5.3823347091674805, validation accuracy: 0.6916666626930237, epoch d