In [1]:
import torch
torch.__version__

'2.0.0+cu118'

In [2]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

print(x.grad_fn)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
None


In [3]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [4]:
z = y * y * 3
out = z.mean()

In [5]:
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [6]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()

print(out)

y.retain_grad()
out.backward(retain_graph=True)

print(x.grad)
print(y.grad)
print(z.grad)
print(z.is_leaf)

out.backward()
print(x.grad)
print(y.grad)

tensor(27., grad_fn=<MeanBackward0>)
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
None
False
tensor([[9., 9.],
        [9., 9.]])
tensor([[9., 9.],
        [9., 9.]])


  print(z.grad)


In [7]:
import pandas as pd

from sklearn.datasets import load_iris

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [8]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    self.layer0 = nn.Linear(4, 128)
    self.layer1 = nn.Linear(128, 64)
    self.layer2 = nn.Linear(64, 32)
    self.layer3 = nn.Linear(32, 16)
    self.layer4 = nn.Linear(16, 3)

    self.bn0 = nn.BatchNorm1d(128)
    self.bn1 = nn.BatchNorm1d(64)
    self.bn2 = nn.BatchNorm1d(32)

    self.act = nn.ReLU()

  
  def forward(self, x):
    x = self.act(self.bn0(self.layer0(x)))
    x = self.act(self.bn1(self.layer1(x)))
    x = self.act(self.bn2(self.layer2(x)))
    x = self.act(self.layer3(x))
    x = self.layer4(x)

    return x

In [9]:
criterion = nn.CrossEntropyLoss()

ex_X, ex_y = torch.randn([4, 4]), torch.tensor([1, 0, 2, 0])

In [12]:
net = Net()
output = net(ex_X)
loss = criterion(output, ex_y)
print(loss)

tensor(1.0715, grad_fn=<NllLossBackward0>)


In [13]:
print(loss.item())

1.0715147256851196


In [14]:
net.zero_grad()

In [15]:
print(net.layer4.bias.grad)
print(net.layer4.bias.is_leaf)

None
True


In [16]:
loss.backward()

In [17]:
print(net.layer4.bias.grad)

tensor([-0.1226,  0.0630,  0.0595])


In [18]:
params = list(net.parameters())

In [20]:
len(params)

16

In [21]:
params[0].size()

torch.Size([128, 4])

In [22]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.001)

optimizer.zero_grad()
output = net(ex_X)
loss = criterion(output, ex_y)
loss.backward()
optimizer.step()

In [25]:
dataset = load_iris()

type(dataset)

sklearn.utils._bunch.Bunch

In [26]:
data = dataset.data
label = dataset.target

print(dataset.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [28]:
print(f'shape of data: {data.shape}')
print(f'shape of label: {label.shape}')

shape of data: (150, 4)
shape of label: (150,)


In [29]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.25, stratify=label)
print(len(X_train))
print(len(X_test))

112
38


In [30]:
# DataLoader 생성
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long()

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

train_set = TensorDataset(X_train, y_train)

train_loader = DataLoader(train_set, batch_size=4, shuffle=True)

In [31]:
net = Net()

In [32]:
print(net)

Net(
  (layer0): Linear(in_features=4, out_features=128, bias=True)
  (layer1): Linear(in_features=128, out_features=64, bias=True)
  (layer2): Linear(in_features=64, out_features=32, bias=True)
  (layer3): Linear(in_features=32, out_features=16, bias=True)
  (layer4): Linear(in_features=16, out_features=3, bias=True)
  (bn0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act): ReLU()
)


In [33]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [34]:
device

'cuda'

In [35]:
net.to(device)

Net(
  (layer0): Linear(in_features=4, out_features=128, bias=True)
  (layer1): Linear(in_features=128, out_features=64, bias=True)
  (layer2): Linear(in_features=64, out_features=32, bias=True)
  (layer3): Linear(in_features=32, out_features=16, bias=True)
  (layer4): Linear(in_features=16, out_features=3, bias=True)
  (bn0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act): ReLU()
)

In [36]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
epochs = 200

In [45]:
losses = list()
accuracies = list()

for epoch in range(200):
  epoch_loss, epoch_accuracy = 0, 0
  for X, y in train_loader:
    X = X.to(device)
    y = y.to(device)

    optimizer.zero_grad()
    output = net(X)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()
    _, predicted = torch.max(output, dim=1)
    accuracy = (predicted == y).sum().item()
    epoch_loss += loss.item()
    epoch_accuracy += accuracy
  
  epoch_loss /= len(train_loader)
  epoch_accuracy /= len(X_train)
  print("epoch: {}, \tloss: {}, \taccuracy: {}".format(str(epoch+1).zfill(3), round(epoch_loss, 4), round(epoch_accuracy,4)))

  losses.append(epoch_loss)
  accuracies.append(epoch_accuracy)

epoch: 001, 	loss: 1.0723, 	accuracy: 0.3839
epoch: 002, 	loss: 1.0662, 	accuracy: 0.375
epoch: 003, 	loss: 1.0545, 	accuracy: 0.4018
epoch: 004, 	loss: 1.0355, 	accuracy: 0.4464
epoch: 005, 	loss: 1.0331, 	accuracy: 0.5089
epoch: 006, 	loss: 1.0203, 	accuracy: 0.5357
epoch: 007, 	loss: 1.0288, 	accuracy: 0.5357
epoch: 008, 	loss: 1.0107, 	accuracy: 0.5982
epoch: 009, 	loss: 1.0016, 	accuracy: 0.6161
epoch: 010, 	loss: 1.0152, 	accuracy: 0.5268
epoch: 011, 	loss: 1.0127, 	accuracy: 0.5089
epoch: 012, 	loss: 0.9803, 	accuracy: 0.6607
epoch: 013, 	loss: 0.9802, 	accuracy: 0.6518
epoch: 014, 	loss: 0.9726, 	accuracy: 0.6964
epoch: 015, 	loss: 0.9725, 	accuracy: 0.6429
epoch: 016, 	loss: 0.9546, 	accuracy: 0.6696
epoch: 017, 	loss: 0.9559, 	accuracy: 0.6696
epoch: 018, 	loss: 0.9532, 	accuracy: 0.6607
epoch: 019, 	loss: 0.9347, 	accuracy: 0.6518
epoch: 020, 	loss: 0.9322, 	accuracy: 0.75
epoch: 021, 	loss: 0.9239, 	accuracy: 0.7232
epoch: 022, 	loss: 0.9243, 	accuracy: 0.7232
epoch: 023, 	