# Device Environment Check

In [1]:
import torch

print(f"cuda activate : {torch.cuda.is_available()}\n") #gpu
print(f"python_version : {sys.version}\n") #python
print(f"torch_version : {torch.__version__}\n") #pytorch
print(f"cuda_version : {torch.cuda_version}\n") #cuda
print(f"cudnn_version : {torch.backends.cudnn.version()}\n") #cudnn

  from .autonotebook import tqdm as notebook_tqdm


cuda activate : True

python_version : 3.8.13 (default, Mar 28 2022, 06:59:08) [MSC v.1916 64 bit (AMD64)]

torch_version : 1.12.1

cuda_version : 11.6

cudnn_version : 8302



In [2]:
import os
import sys
from datetime import datetime

os.getcwd() #디렉토리 확인
#sys.path.append('') #필요 lib나 링크 추가할 때 쓰는 코드

'c:\\Users\\Administrator\\Desktop\\study\\lecture\\Part 5\\Untitled Folder'

# Data Preprocessing

In [3]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch import utils
from torch.utils.data import DataLoader
from torch.optim import RAdam

import wandb
import torchvision
from torchvision import transforms
from torchvision.datasets import FashionMNIST 

In [11]:
#download data
#prepare to download data
#NLP data는 다름
data_root = os.path.join(os.getcwd(), "data")

transform = torchvision.transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.5],[0.5]),
    ]
)
fashion_mnist_dataset = FashionMNIST(data_root, download=True, transform=transform)

In [12]:
"""Loads the data"""
# Create indices for the split
batch_size=100

dataset_size = len(fashion_mnist_dataset)
val_size = int(0.1 * dataset_size)
train_size = dataset_size - val_size

train_dataset, val_dataset = torch.utils.data.random_split(fashion_mnist_dataset,
                                               [train_size, val_size])

train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True)
val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=True)


In [6]:
# Waarmup Scheduler

class WarmupLR(optim.lr_scheduler.LambdaLR) :
    def __init__ (
        self, 
        optimizer:optim.Optimizer,
        warmup_end_steps: int,
        last_epoch : int = -1,    
        ):

        def warmup_fn(step: int):
            if step < warmup_end_steps:
                return float(step) / float(max(warmup_end_steps,1))
            return 1.0

        super().__init__(optimizer, warmup_fn, last_epoch)

# Model Design

In [7]:
# With some modifications, source is from https://github.com/Bjarten/early-stopping-pytorch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.ckpt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.ckpt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        
        filename = self.path.split('/')[-1]
        save_dir = os.path.dirname(self.path)
        print(save_dir, f"val_loss-{val_loss}-{filename}")
        torch.save(model, f = os.path.join(save_dir, f"val_loss-{val_loss}"))
        self.val_loss_min = val_loss

In [8]:
class LeNet(nn.Module):
	def __init__(self, numChannels, classes):
		super(LeNet, self).__init__()
		#Layer1
		self.conv1 = nn.Conv2d(in_channels=numChannels, out_channels=20,kernel_size=(5, 5))
		self.relu1 = nn.ReLU()
		self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		#Layer2
		self.conv2 = nn.Conv2d(in_channels=20, out_channels=50,kernel_size=(5, 5))
		self.relu2 = nn.ReLU()
		self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		self.fc1 = nn.Linear(in_features=800, out_features=500)
		self.relu3 = nn.ReLU()
		self.fc2 = nn.Linear(in_features=500, out_features=classes)
		self.logSoftmax = nn.LogSoftmax(dim=1)
    
	def forward(self, x):
		# pass the input through our first set of CONV => RELU =>
		# POOL layers
		x = self.conv1(x)
		x = self.relu1(x)
		x = self.maxpool1(x)
		# pass the output from the previous layer through the second
		# set of CONV => RELU => POOL layers
		x = self.conv2(x)
		x = self.relu2(x)
		x = self.maxpool2(x)
		# flatten the output from the previous layer and pass it
		# through our only set of FC => RELU layers
		x = torch.flatten(x, 1)
		x = self.fc1(x)
		x = self.relu3(x)
		# pass the output to our softmax classifier to get our output
		# predictions
		x = self.fc2(x)
		output = self.logSoftmax(x)
		# return the output predictions
		return output

## Define Model

In [9]:
#define model.
# crtl + slash => 주석 한번에 달기
#model = MLP(28*28, 128, 64, 10)
#model = MLPWithDropout(28*28, 256,128, 64, 10, dropout_prob=0.3)

model = LeNet(1,len(fashion_mnist_dataset.classes))
model_name = type(model).__name__

#define loss
loss_function = nn.CrossEntropyLoss()

#define optimizer
lr = 1e-3
#optimizer = torch.optim.Adam(model.parameters(), lr=lr)
#optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = RAdam(model.parameters(), lr=lr)
optimizer_name = type(optimizer).__name__

#define scheduler
scheduler = WarmupLR(optimizer, 1500)
scheduler_name = type(scheduler).__name__ if scheduler is not None else "No"

max_epoch = 100

#define tensorboard logger
#log_dir = os.path.join("runs", model_name)

now = datetime.now()
run_name = f"{now.strftime('%Y-%m-%d %H%M')}-{model_name}-{optimizer_name}_optim_{lr}_lr_with_{scheduler_name}_scheduler" 
log_dir = f"runs\{now.strftime('%Y-%m-%d %H%M')}-{model_name}"

log_interval = 100

#define wandb

project_name = "fashion_mnist_tutorials"
run_tags = [project_name]
wandb.init(
    project = project_name,
    name= run_name,
    tags= run_tags,
    config={"lr" : lr, "model_name" : model_name, "optimizer_name" : optimizer_name , "scheduler_name" : scheduler_name},
    reinit=True

)

#save model path+
log_model_path = os.path.join(log_dir, "models")
os.makedirs(log_model_path, exist_ok=True)
print(log_model_path)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmagicturtle[0m. Use [1m`wandb login --relogin`[0m to force relogin


runs\2022-09-25 1539-LeNet\models


In [13]:
#define early stopping
early_stopper = EarlyStopping(
    patience=3, verbose=True, path= os.path.join(log_model_path,"model.ckpt")
)

train_step = 0
for epoch in range(1, max_epoch+1):
    #validation step
    #validation 할 때 optimizer가 train에 관여하지 않도록 해야 함
    with torch.no_grad():
        val_loss = 0.0
        val_corrects =0
        model.eval()
        
        for val_batch_idx, (val_images, val_labels) in enumerate (
            tqdm(val_loader, position=0, leave=True, desc = "validation") #중간중간 output을 예쁘게 보이게
        ):
            # forwards
            val_outputs = model(val_images)
            _, val_preds = torch.max(val_outputs, 1)

            # loss & acc
            val_loss += loss_function(val_outputs, val_labels) / val_outputs.shape[0] #val_outputs.shape는 batchsize
            val_corrects += torch.sum(val_preds == val_labels.data) / val_outputs.shape[0]
     
    #vaild step logging
    val_epoch_loss = val_loss / len(val_loader)
    val_epoch_acc = val_corrects / len(val_loader)

    print(
        f"{epoch} epoch, {train_step} step: val_loss : {val_epoch_loss}, val_acc: {val_epoch_acc}"
        )
    
    #wandb log
    wandb.log({
        "Loss/val": val_epoch_loss,
        "Acc/val" : val_epoch_acc,
        "Images/val" : wandb.Image(val_images),
        "Preds/val" : wandb.Histogram(val_outputs.detach().numpy()),
        "Outputs/val" : wandb.Histogram(val_preds.detach().numpy()),
        "Labels/val" : wandb.Histogram(val_labels.data.detach().numpy()),
        }
        , step=train_step
    )

    #check model early stopping poing & save model if model reached the best performance.
    early_stopper(val_epoch_loss, model)
    if early_stopper.early_stop :
        break

    current_loss = 0
    current_correct= 0 
    model.train()


    #train step    
    for batch_idx, (images, labels) in enumerate(
        tqdm(train_loader, position=0, leave=True, desc = "train") #중간중간 output을 예쁘게 보이게
    ):
        
        current_loss = 0.0
        current_corrects = 0
        
        #forwards
        #get predictions
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
      
        #get loss (Loss 계산)
        loss = loss_function(outputs, labels)
        
        #backpropagation
        #opmizier 초기화
        optimizer.zero_grad()
        
        # Perform backward pass
        loss.backward()
        
        # Perform Optimization
        optimizer.step()

        # Perform Scheduler
        if scheduler is not None :
           scheduler.step()
        
        current_loss += loss.item()
        current_corrects += torch.sum(preds == labels.data)

        if train_step % log_interval == 0:
           train_loss = current_loss / log_interval
           train_acc = current_corrects / log_interval
            
           print(
                    f"{train_step} : train_loss : {train_loss}, train_acc: {train_acc}"
            )

           # wandb log 
           wandb.log({
           "Loss/train": train_loss,
           "Acc/train" : train_acc,
           "Images/train" : wandb.Image(val_images),
           "Preds/train" : wandb.Histogram(outputs.detach().numpy()),
           "Outputs/train" : wandb.Histogram(preds.detach().numpy()),
           "Labels/train" : wandb.Histogram(labels.data.detach().numpy()),
           "Leanring Rate" : scheduler.get_last_lr()[0]
           }
            , step=train_step
           )
           current_loss = 0
           current_correct= 0 
            
        train_step +=1
        


validation: 100%|██████████| 60/60 [00:01<00:00, 31.08it/s]


1 epoch, 0 step: val_loss : 0.023053469136357307, val_acc: 0.10233331471681595
Validation loss decreased (inf --> 0.023053).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.023053469136357307-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:   0%|          | 1/540 [00:00<00:56,  9.61it/s]

0 : train_loss : 0.023037714958190916, train_acc: 0.07000000029802322


train:  19%|█▉        | 103/540 [00:06<00:27, 15.75it/s]

100 : train_loss : 0.022528088092803954, train_acc: 0.3700000047683716


train:  38%|███▊      | 203/540 [00:11<00:19, 17.10it/s]

200 : train_loss : 0.015557587146759033, train_acc: 0.5299999713897705


train:  56%|█████▌    | 303/540 [00:17<00:14, 15.96it/s]

300 : train_loss : 0.008455737829208373, train_acc: 0.7699999809265137


train:  75%|███████▍  | 403/540 [00:23<00:08, 17.05it/s]

400 : train_loss : 0.007424333095550537, train_acc: 0.7099999785423279


train:  93%|█████████▎| 503/540 [00:28<00:02, 16.88it/s]

500 : train_loss : 0.0067047041654586794, train_acc: 0.7400000095367432


train: 100%|██████████| 540/540 [00:30<00:00, 17.46it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 31.51it/s]


2 epoch, 540 step: val_loss : 0.006023855414241552, val_acc: 0.783333420753479
Validation loss decreased (0.023053 --> 0.006024).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.006023855414241552-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:  11%|█▏        | 62/540 [00:03<00:29, 16.43it/s]

600 : train_loss : 0.004999824464321136, train_acc: 0.8299999833106995


train:  30%|███       | 162/540 [00:09<00:22, 16.53it/s]

700 : train_loss : 0.003843148052692413, train_acc: 0.8500000238418579


train:  49%|████▊     | 262/540 [00:14<00:16, 16.78it/s]

800 : train_loss : 0.004501811563968659, train_acc: 0.8199999928474426


train:  67%|██████▋   | 362/540 [00:20<00:10, 16.29it/s]

900 : train_loss : 0.006816683411598206, train_acc: 0.7699999809265137


train:  86%|████████▌ | 462/540 [00:25<00:04, 16.64it/s]

1000 : train_loss : 0.0047094601392745974, train_acc: 0.7599999904632568


train: 100%|██████████| 540/540 [00:29<00:00, 18.02it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 31.91it/s]


3 epoch, 1080 step: val_loss : 0.004098251927644014, val_acc: 0.8528333902359009
Validation loss decreased (0.006024 --> 0.004098).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.004098251927644014-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:   4%|▍         | 24/540 [00:01<00:30, 16.94it/s]

1100 : train_loss : 0.0030907675623893737, train_acc: 0.8899999856948853


train:  23%|██▎       | 122/540 [00:06<00:24, 16.76it/s]

1200 : train_loss : 0.004264865219593048, train_acc: 0.8500000238418579


train:  41%|████      | 222/540 [00:12<00:19, 16.62it/s]

1300 : train_loss : 0.0031796783208847047, train_acc: 0.8899999856948853


train:  60%|█████▉    | 322/540 [00:17<00:13, 16.23it/s]

1400 : train_loss : 0.0039352202415466305, train_acc: 0.8600000143051147


train:  78%|███████▊  | 422/540 [00:23<00:06, 16.94it/s]

1500 : train_loss : 0.003869321346282959, train_acc: 0.8500000238418579


train:  97%|█████████▋| 522/540 [00:29<00:01, 16.49it/s]

1600 : train_loss : 0.003529263436794281, train_acc: 0.8999999761581421


train: 100%|██████████| 540/540 [00:30<00:00, 17.93it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 32.34it/s]


4 epoch, 1620 step: val_loss : 0.003430720651522279, val_acc: 0.880500078201294
Validation loss decreased (0.004098 --> 0.003431).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.003430720651522279-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:  15%|█▌        | 82/540 [00:04<00:27, 16.53it/s]

1700 : train_loss : 0.0033295288681983947, train_acc: 0.8799999952316284


train:  34%|███▎      | 182/540 [00:10<00:21, 16.57it/s]

1800 : train_loss : 0.0037847697734832765, train_acc: 0.8700000047683716


train:  52%|█████▏    | 282/540 [00:15<00:15, 16.54it/s]

1900 : train_loss : 0.002346283197402954, train_acc: 0.9200000166893005


train:  71%|███████   | 382/540 [00:21<00:09, 16.58it/s]

2000 : train_loss : 0.0028591197729110718, train_acc: 0.8700000047683716


train:  89%|████████▉ | 482/540 [00:26<00:03, 16.53it/s]

2100 : train_loss : 0.003328206539154053, train_acc: 0.9100000262260437


train: 100%|██████████| 540/540 [00:30<00:00, 17.95it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 32.27it/s]


5 epoch, 2160 step: val_loss : 0.00316341663710773, val_acc: 0.887666642665863
Validation loss decreased (0.003431 --> 0.003163).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.00316341663710773-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:   8%|▊         | 42/540 [00:02<00:30, 16.37it/s]

2200 : train_loss : 0.0025324925780296327, train_acc: 0.9300000071525574


train:  26%|██▋       | 142/540 [00:07<00:24, 16.48it/s]

2300 : train_loss : 0.002506676912307739, train_acc: 0.9399999976158142


train:  45%|████▍     | 242/540 [00:13<00:17, 16.56it/s]

2400 : train_loss : 0.0019700668752193453, train_acc: 0.9200000166893005


train:  63%|██████▎   | 342/540 [00:19<00:12, 15.80it/s]

2500 : train_loss : 0.0033212104439735413, train_acc: 0.8799999952316284


train:  82%|████████▏ | 442/540 [00:24<00:05, 16.47it/s]

2600 : train_loss : 0.002803739011287689, train_acc: 0.8899999856948853


train: 100%|██████████| 540/540 [00:30<00:00, 17.95it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 31.67it/s]


6 epoch, 2700 step: val_loss : 0.002749453531578183, val_acc: 0.9023332595825195
Validation loss decreased (0.003163 --> 0.002749).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.002749453531578183-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:   0%|          | 2/540 [00:00<00:41, 13.07it/s]

2700 : train_loss : 0.0018758703768253326, train_acc: 0.9399999976158142


train:  19%|█▉        | 102/540 [00:05<00:26, 16.72it/s]

2800 : train_loss : 0.002101212739944458, train_acc: 0.9200000166893005


train:  37%|███▋      | 202/540 [00:11<00:20, 16.82it/s]

2900 : train_loss : 0.0030521559715270996, train_acc: 0.8799999952316284


train:  56%|█████▌    | 302/540 [00:16<00:14, 16.56it/s]

3000 : train_loss : 0.0018886443972587585, train_acc: 0.9200000166893005


train:  74%|███████▍  | 402/540 [00:22<00:08, 16.71it/s]

3100 : train_loss : 0.002500273287296295, train_acc: 0.9100000262260437


train:  93%|█████████▎| 502/540 [00:27<00:02, 16.45it/s]

3200 : train_loss : 0.002037038654088974, train_acc: 0.9200000166893005


train: 100%|██████████| 540/540 [00:30<00:00, 17.96it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 31.87it/s]


7 epoch, 3240 step: val_loss : 0.002697484102100134, val_acc: 0.9058332443237305
Validation loss decreased (0.002749 --> 0.002697).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.002697484102100134-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:  11%|█▏        | 62/540 [00:03<00:28, 16.64it/s]

3300 : train_loss : 0.0019606977701187136, train_acc: 0.9100000262260437


train:  30%|███       | 162/540 [00:09<00:22, 16.46it/s]

3400 : train_loss : 0.0029105913639068605, train_acc: 0.9100000262260437


train:  49%|████▊     | 262/540 [00:14<00:16, 16.73it/s]

3500 : train_loss : 0.0035282608866691587, train_acc: 0.8700000047683716


train:  67%|██████▋   | 362/540 [00:20<00:10, 16.75it/s]

3600 : train_loss : 0.00183489128947258, train_acc: 0.9300000071525574


train:  86%|████████▌ | 462/540 [00:25<00:04, 16.78it/s]

3700 : train_loss : 0.0008021567016839981, train_acc: 0.9800000190734863


train: 100%|██████████| 540/540 [00:30<00:00, 17.94it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 31.77it/s]


8 epoch, 3780 step: val_loss : 0.002689993241801858, val_acc: 0.902166485786438
Validation loss decreased (0.002697 --> 0.002690).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.002689993241801858-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:   4%|▍         | 22/540 [00:01<00:31, 16.61it/s]

3800 : train_loss : 0.0015316396951675416, train_acc: 0.9399999976158142


train:  23%|██▎       | 122/540 [00:06<00:26, 15.78it/s]

3900 : train_loss : 0.0014938491582870484, train_acc: 0.9599999785423279


train:  41%|████      | 222/540 [00:12<00:19, 16.70it/s]

4000 : train_loss : 0.0017315521836280823, train_acc: 0.949999988079071


train:  60%|█████▉    | 322/540 [00:17<00:13, 16.58it/s]

4100 : train_loss : 0.001215102970600128, train_acc: 0.949999988079071


train:  78%|███████▊  | 422/540 [00:23<00:07, 16.61it/s]

4200 : train_loss : 0.0021927472949028015, train_acc: 0.9200000166893005


train:  97%|█████████▋| 522/540 [00:29<00:01, 16.57it/s]

4300 : train_loss : 0.0016269780695438384, train_acc: 0.9399999976158142


train: 100%|██████████| 540/540 [00:30<00:00, 17.96it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 32.01it/s]


9 epoch, 4320 step: val_loss : 0.0027117463760077953, val_acc: 0.9059999585151672
EarlyStopping counter: 1 out of 3


train:  15%|█▌        | 82/540 [00:04<00:28, 16.09it/s]

4400 : train_loss : 0.0014993767440319061, train_acc: 0.9599999785423279


train:  34%|███▎      | 182/540 [00:10<00:21, 16.51it/s]

4500 : train_loss : 0.0012775886058807374, train_acc: 0.949999988079071


train:  52%|█████▏    | 282/540 [00:15<00:15, 16.68it/s]

4600 : train_loss : 0.0009701609611511231, train_acc: 0.9700000286102295


train:  71%|███████   | 382/540 [00:21<00:09, 16.68it/s]

4700 : train_loss : 0.0016563217341899871, train_acc: 0.9599999785423279


train:  89%|████████▉ | 482/540 [00:26<00:03, 16.52it/s]

4800 : train_loss : 0.002324609011411667, train_acc: 0.9399999976158142


train: 100%|██████████| 540/540 [00:30<00:00, 17.95it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 32.51it/s]


10 epoch, 4860 step: val_loss : 0.0025349785573780537, val_acc: 0.9100000262260437
Validation loss decreased (0.002690 --> 0.002535).  Saving model ...
runs\2022-09-25 1539-LeNet\models val_loss-0.0025349785573780537-runs\2022-09-25 1539-LeNet\models\model.ckpt


train:   8%|▊         | 42/540 [00:02<00:30, 16.27it/s]

4900 : train_loss : 0.0020996886491775513, train_acc: 0.9100000262260437


train:  26%|██▋       | 142/540 [00:07<00:24, 16.45it/s]

5000 : train_loss : 0.001535397171974182, train_acc: 0.9300000071525574


train:  45%|████▍     | 242/540 [00:13<00:17, 16.71it/s]

5100 : train_loss : 0.0019870419800281525, train_acc: 0.9200000166893005


train:  63%|██████▎   | 342/540 [00:18<00:12, 16.38it/s]

5200 : train_loss : 0.00149045929312706, train_acc: 0.9399999976158142


train:  82%|████████▏ | 442/540 [00:24<00:05, 16.76it/s]

5300 : train_loss : 0.002000833749771118, train_acc: 0.9399999976158142


train: 100%|██████████| 540/540 [00:29<00:00, 18.01it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 31.92it/s]


11 epoch, 5400 step: val_loss : 0.0026074585039168596, val_acc: 0.9079998731613159
EarlyStopping counter: 1 out of 3


train:   0%|          | 2/540 [00:00<00:39, 13.51it/s]

5400 : train_loss : 0.0014562013745307923, train_acc: 0.949999988079071


train:  19%|█▉        | 102/540 [00:05<00:26, 16.60it/s]

5500 : train_loss : 0.0005703501775860786, train_acc: 0.9800000190734863


train:  37%|███▋      | 202/540 [00:11<00:20, 16.60it/s]

5600 : train_loss : 0.0018944093585014344, train_acc: 0.9300000071525574


train:  56%|█████▌    | 302/540 [00:16<00:14, 16.32it/s]

5700 : train_loss : 0.0017930692434310913, train_acc: 0.9200000166893005


train:  74%|███████▍  | 402/540 [00:22<00:08, 16.58it/s]

5800 : train_loss : 0.0011047585308551788, train_acc: 0.949999988079071


train:  93%|█████████▎| 502/540 [00:28<00:02, 16.35it/s]

5900 : train_loss : 0.0020732849836349486, train_acc: 0.9200000166893005


train: 100%|██████████| 540/540 [00:30<00:00, 17.92it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 32.18it/s]


12 epoch, 5940 step: val_loss : 0.0027158299926668406, val_acc: 0.9111667275428772
EarlyStopping counter: 2 out of 3


train:  11%|█▏        | 62/540 [00:03<00:28, 16.61it/s]

6000 : train_loss : 0.0010173135995864867, train_acc: 0.9700000286102295


train:  30%|███       | 162/540 [00:09<00:22, 16.58it/s]

6100 : train_loss : 0.0006374160945415496, train_acc: 0.9700000286102295


train:  49%|████▊     | 262/540 [00:14<00:16, 16.39it/s]

6200 : train_loss : 0.0013257345557212829, train_acc: 0.9300000071525574


train:  67%|██████▋   | 362/540 [00:20<00:10, 16.37it/s]

6300 : train_loss : 0.0012345672398805618, train_acc: 0.9599999785423279


train:  86%|████████▌ | 462/540 [00:25<00:04, 16.91it/s]

6400 : train_loss : 0.001049957573413849, train_acc: 0.949999988079071


train: 100%|██████████| 540/540 [00:30<00:00, 17.98it/s]
validation: 100%|██████████| 60/60 [00:01<00:00, 32.01it/s]


13 epoch, 6480 step: val_loss : 0.002735863672569394, val_acc: 0.9144998788833618
EarlyStopping counter: 3 out of 3


In [14]:
#save model
os.makedirs("./logs/models", exist_ok=True)
torch.save(model, os.path.join(log_model_path, "model.ckpt"))

In [16]:
#load model
loaded_model = torch.load(os.path.join(log_model_path,"val_loss-0.0025349785573780537"))
loaded_model.eval()
print(loaded_model)

LeNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (logSoftmax): LogSoftmax(dim=1)
)


In [17]:
def softmax(x, axis=0):
    "numpy softmax"
    max = np.max(x, axis=axis, keepdims=True)
    e_x = np.exp(x-max)
    sum = np.sum(e_x, axis=axis, keepdims=True)
    f_x = e_x / sum
    return f_x

In [18]:
#test dataset download
test_batch_size = 100
test_dataset = FashionMNIST(data_root, download=True, train=False, transform=transforms.ToTensor())
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size= test_batch_size, shuffle=False, num_workers=1)


test_labels_list = []
test_preds_list = []
test_outputs_list = []
for i, (test_images, test_labels) in enumerate(tqdm(test_dataloader, position=0, leave=True, desc = "testing")):
    #forward
    test_outputs = loaded_model(test_images)
    _, test_preds = torch.max(test_outputs,1)

    final_outs = softmax(test_outputs.detach().numpy(), axis=1)
    test_outputs_list.extend(final_outs)
    test_preds_list.extend(test_preds.detach().numpy()) #gpu로 연산한 데이터는 넘파이로 사용할 때 반드시 detach로 풀어써줘서 cpu로 옮겨줘야함
    test_labels_list.extend(test_labels.detach().numpy()) #gpu로 연산한 데이터는 넘파이로 사용할 때 반드시 detach로 풀어써줘서 cpu로 옮겨줘야함
    

test_preds_list = np.array(test_preds_list) #acc를 구하기 위해 numpy로 바꿔
test_labels_list = np.array(test_labels_list)

print(f"acc: {np.mean(test_preds_list == test_labels_list)*100}")



testing: 100%|██████████| 100/100 [00:03<00:00, 29.47it/s]

acc: 84.13000000000001



