In [2]:
import torch
from torch import nn

import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

import os, random
import numpy as np 
import pandas as pd 

SEED = 42

# 학습에 사용할 CPU나 GPU 장치를 얻습니다.
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
from timeit import default_timer as timer 

def print_train_time(start: float, end: float, device: torch.device = None):
    
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

from tqdm.auto import tqdm # bar
train_time_start_on_cpu = timer()

## 일반 비선형 모델

### data 

In [3]:
train_data = datasets.FashionMNIST(
    root="desktop", # 더이데 다운로드 할지
    train=True, 
    download=True, 
    transform=ToTensor(), 
    target_transform=None 
)
# Setup testing data
test_data = datasets.FashionMNIST(
    root="desktop",
    train=False, 
    download=True,
    transform=ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to desktop\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:21<00:00, 1235140.41it/s]


Extracting desktop\FashionMNIST\raw\train-images-idx3-ubyte.gz to desktop\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to desktop\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 47898.91it/s]


Extracting desktop\FashionMNIST\raw\train-labels-idx1-ubyte.gz to desktop\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to desktop\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:04<00:00, 989433.96it/s] 


Extracting desktop\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to desktop\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to desktop\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<?, ?it/s]

Extracting desktop\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to desktop\FashionMNIST\raw






In [22]:
image , label = train_data[0]
image2, label2 = train_data[1]
image2.shape , label , label2

(torch.Size([1, 28, 28]), 9, 0)

In [27]:
len(train_data.data), len(train_data.targets), len(test_data.data), len(test_data.targets)

(60000, 60000, 10000, 10000)

In [29]:
train_data.data.shape , train_data.targets.shape

(torch.Size([60000, 28, 28]), torch.Size([60000]))

In [30]:
class_names = train_data.classes
class_names 

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

### Dataloader

In [32]:
## 데이터 로더 적용
from torch.utils.data import DataLoader

BATCH_SIZE = 32
train_dataloader = DataLoader(train_data,
    batch_size=BATCH_SIZE,
    shuffle=True 
)
test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False 
)

print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")
# 배치 차원이 추가가 된다 

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x00000218B4FF8E80>, <torch.utils.data.dataloader.DataLoader object at 0x00000218B4FCF7F0>)
Length of train dataloader: 1875 batches of 32
Length of test dataloader: 313 batches of 32


### 설계 및 모델 형성

In [41]:
class NonLinearModel(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(), 
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(), 
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU() 
        )
    
    def forward(self, x: torch.Tensor):
        return self.layer_stack(x)

In [40]:
torch.manual_seed(SEED)

non_linear_model = NonLinearModel(input_shape=784, 
    hidden_units=10,
    output_shape=len(class_names) # class개로가 최종 output
)

In [42]:
non_linear_model

NonLinearModel(
  (layer_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
    (2): ReLU()
    (3): Linear(in_features=10, out_features=10, bias=True)
    (4): ReLU()
  )
)

### 학습

In [45]:
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=non_linear_model.parameters(), 
                            lr=0.1)

ImportError: cannot import name 'accuracy_fn' from 'helper_functions' (c:\Users\User\Documents\boot camp - study\github\hojeong-project\.venv\lib\site-packages\helper_functions.py)

In [None]:
def train_step(model: torch.nn.Module,
                data_loader: torch.utils.data.DataLoader,
                loss_fn: torch.nn.Module,
                optimizer: torch.optim.Optimizer,
                accuracy_fn,
                device: torch.device = device):
    train_loss, train_acc = 0, 0
    for batch, (X, y) in enumerate(data_loader):

        # 1. Forward pass
        y_pred = model(X)
        # 2. Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y,
                                y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels


        # 3. Optimizer zero grad
        optimizer.zero_grad()
        # 4. Loss backward
        loss.backward()
        # 5. Optimizer step
        optimizer.step()

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

### 평가

In [None]:
torch.manual_seed(SEED)

# Measure time
from timeit import default_timer as timer
train_time_start_on_gpu = timer()

epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=non_linear_model, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn
    )
    test_step(data_loader=test_dataloader,
        model=non_linear_model,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn
    )

train_time_end_on_gpu = timer()
total_train_time_non_linear_model = print_train_time(start=train_time_start_on_gpu,
                                            end=train_time_end_on_gpu,
                                            device=device)

In [None]:
torch.manual_seed(SEED)

non_linear_model_results = eval_model(
    model=non_linear_model, 
    data_loader=test_dataloader,
    loss_fn=loss_fn, 
    accuracy_fn=accuracy_fn) 

non_linear_model_results 
# 모델이름 , loss , acc 나옴 

## CNN

In [47]:
# Create a convolutional neural network 
class CNNModel(nn.Module):  # Relu 를 넣는
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int): 
        super().__init__() # 부모님 생성 
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                    out_channels=hidden_units, 
                    kernel_size=3, 
                    stride=1, 
                    padding=1),   # conv layer
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                    out_channels=hidden_units,
                    kernel_size=3,
                    stride=1,
                    padding=1),  # cnov layer 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                        stride=2) #maxpooling
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1), # padding 1 , kernal size = (3,3) cnn
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2) # maxpooling
        )
        self.classifier = nn.Sequential(           # 학습 부분
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7, 
                        out_features=output_shape) # 선형(앞에서 선형 부분이 잘 나왔으므로 relu를 사용하지 않아도 된다)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.classifier(x)
        return x

In [49]:
torch.manual_seed(SEED)

cnn_model = CNNModel(input_shape=1,
                     hidden_units=10,
                     output_shape=len(class_names))
cnn_model

CNNModel(
  (block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)

### 평가

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=cnn_model.parameters(), lr=0.1)

In [None]:
torch.manual_seed(SEED) #매우중요 

# Measure time
from timeit import default_timer as timer
train_time_start_model_2 = timer()

# Train and test model 
epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=cnn_model, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=test_dataloader,
        model=cnn_model,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_model_2 = timer()
total_train_time_cnn_model = print_train_time(start=train_time_start_model_2,
                                                end=train_time_end_model_2,
                                                device=device)

In [None]:
# Get cnn_model results 
cnn_model_results = eval_model(
    model=cnn_model,
    data_loader=test_dataloader,
    loss_fn=loss_fn,
    accuracy_fn=accuracy_fn
)

cnn_model_results
# name , loss , acc

## 모델 뭐가 더 좋은지 비교할떄 사용

In [None]:
compare_results = pd.DataFrame([non_linear_model_results, cnn_model_results])
compare_results["training_time"] = [total_train_time_non_linear_model,
                                    total_train_time_cnn_model]
compare_results # 데이터프레임으로 name , loss , acc , time 나옴 