<a href="https://colab.research.google.com/github/R12942159/NTU_ML/blob/Hw3/Embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import glob
import csv
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn

from PIL import Image
from tqdm.auto import tqdm
from torch.optim import Adam
from torchvision import transforms as tr
from torch.utils.data import DataLoader, Dataset, random_split

In [None]:
from google.colab import files
files.upload()
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c ml2023-fall-hw3
!unzip 'ml2023-fall-hw3.zip'

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using: {device}")

Using: cuda


#### Build Dataset and Dataloader

In [6]:
class Hw3_dataset(torch.utils.data.Dataset):
    def __init__(self, data_path: str, to_tensor, transform, augmentation=True) -> None:
        self.all_data = np.load(data_path)
        self.to_tensor = to_tensor
        self.transform = transform
        self.augmentation = augmentation

        hflip = tr.RandomHorizontalFlip(p=0.5)
        vflip = tr.RandomVerticalFlip(p=0.5)
        rotate = tr.RandomRotation(degrees=15)
        color = tr.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)

        def origine(x): return x
        if augmentation:
            self.augmentation = [hflip, vflip, rotate, origine]
        else:
            self.augmentation = [origine]

    def __len__(self):
        return len(self.all_data)

    def __getitem__(self, idx):
        # read img
        img = self.all_data[idx] # (3, 32, 32)

        # to tensor type
        img = self.to_tensor(img)
        img /= 255.

        # transform/normalize img
        img_aug = self.transform(img)

        # augmentation img
        augment = random.choice(self.augmentation)
        img_aug = augment(img_aug)
        return img_aug, img

In [7]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

img_ds = Hw3_dataset(
    '/content/data/trainX.npy',
    to_tensor = tr.Compose([
        tr.ToTensor(),
    ]),
    transform = tr.Compose([
        tr.Normalize(mean=mean, std=std),
    ]),
    augmentation = True,
)

##### Randomly divided into a training and validation dataset.

In [8]:
train_prob = 0.85

train_size = int(len(img_ds) * train_prob)
val_size = int(len(img_ds) - train_size)
train_ds, val_ds = random_split(img_ds, [train_size, val_size])

##### Build Dataloders

In [17]:
BATCH_SIZE = 128
IMG_SIZE = 32

train_loader = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, BATCH_SIZE, shuffle=False, num_workers=0)

#### Contruct Model

In [18]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding='same'):
        super(ConvBlock, self).__init__()
        self.convblock = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding='same'),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),

            nn.Conv2d(out_channels, out_channels, kernel_size, stride=1, padding='same'),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def __call__(self, x):
        return self.convblock(x)

class Net(nn.Module):
    def __init__(self, in_channels=3, init_channels=16, latent_dim=128, img_size=IMG_SIZE):
        super(Net, self).__init__()
        self.init_channels = init_channels
        self.img_size = img_size

        # Encoder
        self.encoder1 = ConvBlock(in_channels, init_channels) # (3, H, W) -> (16, H, W)
        self.pooling1 = nn.MaxPool2d(2)
        self.encoder2 = ConvBlock(init_channels, init_channels*2) # (16, H/2, W/2) -> (32, H/2, W/2)
        self.pooling2 = nn.MaxPool2d(2)
        self.encoder3 = ConvBlock(init_channels*2, init_channels*4) # (32, H/4, W/4) -> (64, H/4, W/4)

        # Decoder
        self.upconv2 = nn.ConvTranspose2d(init_channels*4, init_channels*2, kernel_size=2, stride=2) # (256, H/4, W/4) -> (128, H/2, W/2)
        self.decoder2 = ConvBlock(init_channels*4, init_channels*2)
        self.upconv1 = nn.ConvTranspose2d(init_channels*2, init_channels, kernel_size=2, stride=2) # (128, H/2, W/2) -> (64, H, W)
        self.decoder1 = ConvBlock(init_channels*2, init_channels)

        # Output
        self.output = nn.Conv2d(init_channels, in_channels, kernel_size=1) # (64, H, W) -> (num_classes, H, W)

    def __call__(self, x):
        # Encoder
        encode1 = self.encoder1(x)
        encode2 = self.encoder2(self.pooling1(encode1))

        # Bottleneck
        bottleneck = self.encoder3(self.pooling2(encode2))

        # Decoder
        x = torch.cat((self.upconv2(bottleneck), encode2), dim=1) # (512, H/2, W/2)
        x = self.decoder2(x) # (128, H/4, W/4)
        x = torch.cat((self.upconv1(x), encode1), dim=1) # (128, H, W)
        x = self.decoder1(x) # (64, H, W)
        x = self.output(x)

        return bottleneck, x

In [19]:
import torchsummary

model = Net().to(device)
torchsummary.summary(model, (3, 32, 32), device=device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
            Conv2d-4           [-1, 16, 32, 32]           2,320
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
         MaxPool2d-7           [-1, 16, 16, 16]               0
            Conv2d-8           [-1, 32, 16, 16]           4,640
       BatchNorm2d-9           [-1, 32, 16, 16]              64
             ReLU-10           [-1, 32, 16, 16]               0
           Conv2d-11           [-1, 32, 16, 16]           9,248
      BatchNorm2d-12           [-1, 32, 16, 16]              64
             ReLU-13           [-1, 32, 16, 16]               0
        MaxPool2d-14             [-1, 3

#### Define training and testing process

In [20]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.train() # to training mode.
    epoch_loss = 0
    for batch_i, (img_aug, img) in enumerate(tqdm(dataloader, leave=False)):
        img_aug, img = img_aug.to(device, dtype=torch.float), img.to(device,  dtype=torch.float) # move data to GPU

        _, reconsturction = model(img_aug)
        loss = loss_fn(reconsturction, img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # write to logs
        epoch_loss += loss.item() # tensor -> python value

    # return avg loss of epoch, acc of epoch
    return epoch_loss/num_batches


def validate(dataloader, model, loss_fn):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.eval() # model to test mode.
    epoch_loss = 0

    # No gradient for test data
    with torch.no_grad():
        for batch_i, (img_aug, img) in enumerate(dataloader):
            img_aug, img = img_aug.to(device,  dtype=torch.float), img.to(device,  dtype=torch.float)

            _, reconsturction = model(img_aug)
            loss = loss_fn(reconsturction, img)

            # write to logs
            epoch_loss += loss.item()

    return epoch_loss/num_batches

In [21]:
EPOCHS = 100

model = Net().to(device)
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

logs = {
    'train_loss': [], 'val_loss': []
}

# early stopping
patience = 10
counter = 0
best_loss = np.inf

for epoch in tqdm(range(EPOCHS)):
    train_loss = train(train_loader, model, loss_fn, optimizer)
    val_loss = validate(val_loader, model, loss_fn)

    print(f'EPOCH: {(epoch+1):04d} -> train_loss: {train_loss:.4f} \ val_loss: {val_loss:.4f}')

    logs['train_loss'].append(train_loss)
    logs['val_loss'].append(val_loss)

    # if epoch % 10 == 0:
    #     torch.save(model.state_dict(), f'DL_hw2_epoch{epoch+1}_model.pth')
    # chcek improvement
    if val_loss <  best_loss:
        counter = 0
        best_loss = val_loss
        torch.save(model.state_dict(), f'/content/drive/MyDrive/NTU_ML/Hw3/ckpt/{epoch+1}best_model.pth')
        print('-------------------- Model Save --------------------')
    else:
        counter += 1
    if counter >= patience:
        print('-------------------- Early Stop --------------------')
        break

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0001 -> train_loss: 1.5361 \ val_loss: 1.5256
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0002 -> train_loss: 1.5147 \ val_loss: 1.5223
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0003 -> train_loss: 1.5139 \ val_loss: 1.5207
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0004 -> train_loss: 1.5131 \ val_loss: 1.5203
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0005 -> train_loss: 1.5126 \ val_loss: 1.5196
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0006 -> train_loss: 1.5118 \ val_loss: 1.5191
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0007 -> train_loss: 1.5116 \ val_loss: 1.5197


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0008 -> train_loss: 1.5112 \ val_loss: 1.5199


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0009 -> train_loss: 1.5109 \ val_loss: 1.5189
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0010 -> train_loss: 1.5110 \ val_loss: 1.5186
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0011 -> train_loss: 1.5109 \ val_loss: 1.5190


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0012 -> train_loss: 1.5108 \ val_loss: 1.5182
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0013 -> train_loss: 1.5104 \ val_loss: 1.5191


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0014 -> train_loss: 1.5102 \ val_loss: 1.5186


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0015 -> train_loss: 1.5105 \ val_loss: 1.5186


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0016 -> train_loss: 1.5105 \ val_loss: 1.5184


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0017 -> train_loss: 1.5104 \ val_loss: 1.5183


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0018 -> train_loss: 1.5101 \ val_loss: 1.5182
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0019 -> train_loss: 1.5104 \ val_loss: 1.5183


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0020 -> train_loss: 1.5103 \ val_loss: 1.5182
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0021 -> train_loss: 1.5099 \ val_loss: 1.5184


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0022 -> train_loss: 1.5104 \ val_loss: 1.5186


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0023 -> train_loss: 1.5102 \ val_loss: 1.5182


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0024 -> train_loss: 1.5105 \ val_loss: 1.5182


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0025 -> train_loss: 1.5100 \ val_loss: 1.5181
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0026 -> train_loss: 1.5097 \ val_loss: 1.5183


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0027 -> train_loss: 1.5100 \ val_loss: 1.5184


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0028 -> train_loss: 1.5102 \ val_loss: 1.5183


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0029 -> train_loss: 1.5099 \ val_loss: 1.5178
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0030 -> train_loss: 1.5100 \ val_loss: 1.5180


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0031 -> train_loss: 1.5099 \ val_loss: 1.5181


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0032 -> train_loss: 1.5100 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0033 -> train_loss: 1.5098 \ val_loss: 1.5180


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0034 -> train_loss: 1.5099 \ val_loss: 1.5181


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0035 -> train_loss: 1.5100 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0036 -> train_loss: 1.5098 \ val_loss: 1.5177
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0037 -> train_loss: 1.5101 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0038 -> train_loss: 1.5099 \ val_loss: 1.5181


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0039 -> train_loss: 1.5097 \ val_loss: 1.5180


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0040 -> train_loss: 1.5101 \ val_loss: 1.5176
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0041 -> train_loss: 1.5097 \ val_loss: 1.5180


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0042 -> train_loss: 1.5096 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0043 -> train_loss: 1.5100 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0044 -> train_loss: 1.5096 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0045 -> train_loss: 1.5097 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0046 -> train_loss: 1.5096 \ val_loss: 1.5174
-------------------- Model Save --------------------


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0047 -> train_loss: 1.5098 \ val_loss: 1.5178


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0048 -> train_loss: 1.5096 \ val_loss: 1.5183


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0049 -> train_loss: 1.5098 \ val_loss: 1.5176


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0050 -> train_loss: 1.5094 \ val_loss: 1.5176


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0051 -> train_loss: 1.5096 \ val_loss: 1.5175


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0052 -> train_loss: 1.5099 \ val_loss: 1.5181


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0053 -> train_loss: 1.5094 \ val_loss: 1.5180


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0054 -> train_loss: 1.5095 \ val_loss: 1.5179


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0055 -> train_loss: 1.5095 \ val_loss: 1.5178


  0%|          | 0/60 [00:00<?, ?it/s]

EPOCH: 0056 -> train_loss: 1.5094 \ val_loss: 1.5176
-------------------- Early Stop --------------------
