# Step 1: Import lib

In [9]:
import torch as torch # core lib for deep learning methods
import time as time   # maybe I will use time to test how long a piece of code takes
import numpy as np    # it's likely that we use numpy to perform matrix manipulation
import cv2 as cv      # image I/O
import os as os

# Step 2: Build custom class for the dataset
* ref: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
* Custom Dataset needs at least the following 3 methods:
    1. `__init__`: This function is run once when instantiating the Dataset object.
    2. `__len__`: This function returns the number of samples in our dataset.
        * At least, dataloader will need to call this function.
    3. `__getitem__`: This function loads and returns a sample from the dataset at the given index (i.e., `idx`).

In [2]:
from torch.utils.data import Dataset

class CAT_DOG_DATASET(Dataset):
    
    def __init__(self, img_dir, transform=None):
        pass
    
    def __len__(self):
        pass
    
    def __getitem__(self, idx):
        return image, label

# Step 3: Setup dataloader

In [3]:
from torch.utils.data import DataLoader

dataset_tran = CAT_DOG_DATASET(img_dir="./train")
dataset_test = CAT_DOG_DATASET(img_dir="./test")
tran_loader = DataLoader(dataset_tran, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=64)

TypeError: 'NoneType' object cannot be interpreted as an integer

# Step 4: Get device for training
* ref: https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


# Step 5: Define the class (for model)
* `nn.Conv2d`: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
* `nn.Linear`: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear
* [calculator](https://madebyollin.github.io/convnet-calculator/) for `width`, `height`, `channels`, `stride`, `padding`

In [7]:
from torch import nn

class CNN_v1(nn.Module):
    
    def __init__(self, img_size): # I add one more param here, i.e., img_size, for changing CNN structure auto
        super(CNN_v1, self).__init__()
        
        self.img_size = img_size # assume (B, C=3, H=256, W=256)
        
        self.cspec = [3, 64, 32, 16] # cspec stands for conv spec
        self.fspec = [128, 64, 1] # fspec stands for fully connected layer spec

        self.repeat_conv = nn.Sequential(

            # 換換不同的寫法 v1
            nn.Conv2d(
                in_channels = self.cspec[0],
                out_channels = self.cspec[1],
                kernel_size = (3, 3), # this could be tuple, i.e., (3,3), or just integer i.e., 3.
                stride = 2, # based on the calculator mentioned above, this setting will make spatial size half
                padding = 1 # i.e., 256 x 256 -> 128 x 128 
            ), # (B, C=64, H=128, W=128)
            
            nn.MaxPool2d(kernel_size=2), # (B, C=64, H=64, W=64)

            # 換換不同的寫法 v2
            nn.Conv2d(in_channels=self.cspec[1], out_channels=self.cspec[2], kernel_size=(3, 3), stride=2, padding=1), # (B, C=32, H=32, W=32)
            
            nn.MaxPool2d(kernel_size=2), # (B, C=32, H=16, W=16)
            
            nn.Conv2d(in_channels=self.cspec[2], out_channels=self.cspec[3], kernel_size=(3, 3), stride=2, padding=1), # (B, C=16, H=8, W=8)
        )

        self.flatten = nn.Flatten()

        # 需要優化 #
        C = 16
        H = 8
        W = H # assume square
        self.repeat_dense = nn.Sequential(
            nn.Linear(in_features= C*H*W, out_features=self.fspec[0]),
            nn.Linear(in_features= self.fspec[0], out_features=self.fspec[1]),
            nn.Linear(in_features= self.fspec[1], out_features=self.fspec[2]),
        )
    
    def forward(self, img):
        feature_map = self.repeat_conv(img)
        features = self.flatten(feature_map)
        logits = self.repeat_dense(features)
        return logits
    
def test():
    batch_size = 10
    channel_num = 3
    img_size = 256

    CNN_model = CNN_v1(img_size=img_size)
    
    # Usually use torch.randn to test if model can work as expected
    input_img = torch.randn(batch_size, channel_num, img_size, img_size) # (batch_size, channel, width, height)
    
    output_val= CNN_model(input_img)
    
    assert output_val.detach().numpy().shape == (batch_size, 1)