<a href="https://colab.research.google.com/github/Junha9/AI/blob/master/Self_Learning_ML_DL/Chapter_07_2_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# import statements
import torch
import torch.nn as nn
import torch.optim as optim
from torchinfo import summary
from torchvision.datasets import FashionMNIST
from sklearn.model_selection import train_test_split

In [2]:
# download the FashionMNIST data
fm_train = FashionMNIST(root='.', train=True, download=True)
fm_test = FashionMNIST(root='.', train=False, download=True)

100%|██████████| 26.4M/26.4M [00:02<00:00, 11.2MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 208kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.81MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 29.3MB/s]


In [3]:
# Check the dataset. It is composed of torch.Tensor class.
print(type(fm_train.data))
print(fm_train.data.shape, fm_test.data.shape)
print(fm_train.targets.shape, fm_test.targets.shape)

<class 'torch.Tensor'>
torch.Size([60000, 28, 28]) torch.Size([10000, 28, 28])
torch.Size([60000]) torch.Size([10000])


In [4]:
# pytorch tensor also supports broadcasting just like numpy array.
train_input = fm_train.data
train_target = fm_train.targets
train_scaled = train_input / 255.0

In [5]:
train_scaled, val_scaled, train_target, val_target = train_test_split(train_scaled, train_target, test_size=0.2, random_state=42)
print(train_scaled.shape, val_scaled.shape)

torch.Size([48000, 28, 28]) torch.Size([12000, 28, 28])


In [6]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 100),
    nn.ReLU(),
    nn.Linear(100,10)
)

In [7]:
summary(model, input_size=(32, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [32, 10]                  --
├─Flatten: 1-1                           [32, 784]                 --
├─Linear: 1-2                            [32, 100]                 78,500
├─ReLU: 1-3                              [32, 100]                 --
├─Linear: 1-4                            [32, 10]                  1,010
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 2.54
Input size (MB): 0.10
Forward/backward pass size (MB): 0.03
Params size (MB): 0.32
Estimated Total Size (MB): 0.45

In [8]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=100, bias=True)
  (2): ReLU()
  (3): Linear(in_features=100, out_features=10, bias=True)
)

In [11]:
# create loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [13]:
# We have to manually train the model when using pytorch
epochs = 5
batches = int(len(train_scaled)/32)
for epoch in range(epochs):
  model.train()
  train_loss = 0
  for i in range(batches):
    inputs = train_scaled[i*32:(i+1)*32].to(device)
    targets = train_target[i*32:(i+1)*32].to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  print(f"epoch: {epoch + 1}, loss: {train_loss/batches:.4f}")

epoch: 1, loss: 0.5435
epoch: 2, loss: 0.4022
epoch: 3, loss: 0.3613
epoch: 4, loss: 0.3341
epoch: 5, loss: 0.3127


In [15]:
# We also have to manually evaluate when using pytorch
model.eval()
with torch.no_grad():
  val_scaled = val_scaled.to(device)
  val_target = val_target.to(device)
  outputs = model(val_scaled)
  predicts = torch.argmax(outputs, 1)
  corrects = (predicts == val_target).sum().item()
accuracy = corrects / len(val_target)
print(f"accuracy: {accuracy:.4f}")

accuracy: 0.8705
