In [1]:
import cv2
import numpy as np
import torch
from torch import nn
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import albumentations as A
from sklearn.metrics import classification_report
from tqdm import tqdm

from glob import glob
import os

# 1. Dataloader

In [2]:
class OKNGDataset(Dataset):
    def __init__(
        self,
        ok_img_path,
        ng_img_path,
        train_val_split_ratio,
        img_size,
        mode
    ):
        super().__init__()
        ok_img_path_list = sorted(glob(os.path.join(ok_img_path, '*.jpg')))
        ng_img_path_list = sorted(glob(os.path.join(ng_img_path, '*.jpg')))
        
        if mode == 'train':
            ok_img_path_list = ok_img_path_list[
                :int(train_val_split_ratio * len(ok_img_path_list))
            ]
            ng_img_path_list = ng_img_path_list[
                :int(train_val_split_ratio * len(ng_img_path_list))
            ]
        elif mode == 'val':
            ok_img_path_list = ok_img_path_list[
                int(train_val_split_ratio * len(ok_img_path_list)):
            ]
            ng_img_path_list = ng_img_path_list[
                int(train_val_split_ratio * len(ng_img_path_list)):
            ]
        else:
            raise

        ok_label_list = [0] * len(ok_img_path_list)
        ng_label_list = [1] * len(ng_img_path_list)

        self.img_list = ok_img_path_list + ng_img_path_list
        self.label_list = ok_label_list + ng_label_list
        
        self.tensor_transform = T.Compose([
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])
        self.resize_transform = A.Resize(img_size, img_size)
        self.augmentation_transform = A.Compose([
            A.RandomRotate90(),
            A.HorizontalFlip(),
            A.VerticalFlip(),
            A.RandomBrightnessContrast(p=0.5),
        ])
        self.mode = mode

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, index):
        # print(self.img_list[index])
        img = cv2.imread(self.img_list[index])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img = self.resize_transform(image=img)['image']
        if self.mode == 'train':
            img = self.augmentation_transform(image=img)['image']
        img = self.tensor_transform(img)
        
        label = self.label_list[index]
        return img, label

In [3]:
train_dataset = OKNGDataset(
    ok_img_path='lg_innotek_ok_ng_dataset/ng_ok_image_dataset/ok_img/',
    ng_img_path='lg_innotek_ok_ng_dataset/ng_ok_image_dataset/ng_img/',
    train_val_split_ratio=0.8,
    img_size=256,
    mode='train'
)
len(train_dataset)

1654

In [4]:
train_dataset[0]

(tensor([[[-1.8610, -1.6727, -1.6555,  ..., -0.5596, -1.2788, -1.0219],
          [ 0.1254, -0.1828,  0.0398,  ..., -0.8678, -0.5938, -0.1999],
          [-1.4672, -0.4739,  0.3138,  ...,  0.1939, -1.2788, -0.8507],
          ...,
          [-0.9877,  0.6734, -0.3712,  ...,  1.6495, -0.5424,  0.5878],
          [-1.5014, -1.2959, -1.7925,  ..., -0.3883, -1.3644,  0.4508],
          [-1.8439, -1.9809, -1.9295,  ..., -1.0048, -0.0801, -1.6384]],
 
         [[-1.7731, -1.5805, -1.5630,  ..., -0.4426, -1.1779, -0.9153],
          [ 0.2577, -0.0574,  0.1702,  ..., -0.7577, -0.4776, -0.0749],
          [-1.3704, -0.3550,  0.4503,  ...,  0.3277, -1.1779, -0.7402],
          ...,
          [-0.8803,  0.8179, -0.2500,  ...,  1.8158, -0.4251,  0.7304],
          [-1.4055, -1.1954, -1.7031,  ..., -0.2675, -1.2654,  0.5903],
          [-1.7556, -1.8957, -1.8431,  ..., -0.8978,  0.0476, -1.5455]],
 
         [[-1.5430, -1.3513, -1.3339,  ..., -0.2184, -0.9504, -0.6890],
          [ 0.4788,  0.1651,

In [5]:
val_dataset = OKNGDataset(
    ok_img_path='lg_innotek_ok_ng_dataset/ng_ok_image_dataset/ok_img/',
    ng_img_path='lg_innotek_ok_ng_dataset/ng_ok_image_dataset/ng_img/',
    train_val_split_ratio=0.8,
    img_size=256,
    mode='val'
)
len(val_dataset)

414

In [6]:
val_dataset[0]

(tensor([[[-2.0152, -1.9980, -1.4329,  ..., -0.4226, -0.9363, -1.5357],
          [-2.0152, -1.3473, -0.9192,  ...,  1.4269, -0.7479, -1.9980],
          [-1.3987, -0.9705, -0.3369,  ..., -0.3712, -1.1760, -1.0904],
          ...,
          [ 0.4337,  1.4783,  1.3755,  ..., -0.8164,  0.0741, -0.6452],
          [-1.2445, -1.6727, -0.2342,  ...,  0.2111, -0.5938, -1.2788],
          [-1.2103,  0.9817, -1.4500,  ..., -0.6452, -0.7650, -1.0904]],
 
         [[-1.9307, -1.9132, -1.3354,  ..., -0.3025, -0.8277, -1.4405],
          [-1.9307, -1.2479, -0.8102,  ...,  1.5882, -0.6352, -1.9132],
          [-1.3004, -0.8627, -0.2150,  ..., -0.2500, -1.0728, -0.9853],
          ...,
          [ 0.5728,  1.6408,  1.5357,  ..., -0.7052,  0.2052, -0.5301],
          [-1.1429, -1.5805, -0.1099,  ...,  0.3452, -0.4776, -1.1779],
          [-1.1078,  1.1331, -1.3529,  ..., -0.5301, -0.6527, -0.9853]],
 
         [[-1.6999, -1.6824, -1.1073,  ..., -0.0790, -0.6018, -1.2119],
          [-1.6999, -1.0201,

In [7]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=8, num_workers=0)

In [8]:
for data, label in train_loader:
    print(data, data.shape)
    print(label, label.shape)
    break

tensor([[[[-1.9809, -1.8782, -0.4397,  ..., -1.2103, -1.9809, -1.8953],
          [-1.9124,  0.0741, -0.3369,  ...,  0.2624, -1.2788, -0.9020],
          [-1.8439,  0.2282, -0.5082,  ..., -1.0733, -0.2513, -1.0219],
          ...,
          [-1.2959,  0.1768, -1.0562,  ..., -0.2342,  0.2796, -1.0562],
          [-1.7069, -1.5528, -1.5870,  ...,  0.5707, -0.5596, -1.9467],
          [-1.4672, -1.7754, -1.8097,  ..., -0.3198, -1.1418, -2.0494]],

         [[-1.8957, -1.7906, -0.3200,  ..., -1.1078, -1.8957, -1.8081],
          [-1.8256,  0.2052, -0.2150,  ...,  0.3978, -1.1779, -0.7927],
          [-1.7556,  0.3627, -0.3901,  ..., -0.9678, -0.1275, -0.9153],
          ...,
          [-1.1954,  0.3102, -0.9503,  ..., -0.1099,  0.4153, -0.9503],
          [-1.6155, -1.4580, -1.4930,  ...,  0.7129, -0.4426, -1.8606],
          [-1.3704, -1.6856, -1.7206,  ..., -0.1975, -1.0378, -1.9657]],

         [[-1.6650, -1.5604, -0.0964,  ..., -0.8807, -1.6650, -1.5779],
          [-1.5953,  0.4265,  

# 2. Model

In [9]:
#     Conv2D(filters=8, kernel_size=3, activation='relu', padding='same', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
#     Conv2D(filters=8, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=32, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=32, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
#     Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
#     BatchNormalization(),
#     MaxPooling2D(pool_size=2, strides=2),

#     Flatten(),
#     Dense(units=256, activation='relu'),
#     Dropout(rate=0.3),
#     Dense(units=1, activation='sigmoid')

In [10]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_1_1 = nn.Conv2d(3, 8, 3)
        self.relu_1_1 = nn.ReLU()
        self.conv_1_2 = nn.Conv2d(8, 8, 3)
        self.relu_1_2 = nn.ReLU()
        self.bn = nn.BatchNorm2d(8)
        self.max_pool_1 = nn.MaxPool2d(2, 2)
        
        self.flatten = nn.Flatten()
        self.linear_1 = nn.Linear(127008, 256)
        self.drop_out = nn.Dropout(0.3)
        self.linear_2 = nn.Linear(256, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv_1_1(x)
        x = self.relu_1_1(x)
        x = self.conv_1_2(x)
        x = self.relu_1_2(x)
        x = self.bn(x)
        x = self.max_pool_1(x)

        x = self.flatten(x)
        x = self.linear_1(x)
        x = self.drop_out(x)
        x = self.linear_2(x)
        x = self.sigmoid(x)

        return x

In [11]:
model = ClassificationModel()

In [12]:
for data, label in train_loader:
    print(model(data))
    break

tensor([[0.6190],
        [0.5018],
        [0.5018],
        [0.6458],
        [0.5859],
        [0.6772],
        [0.5615],
        [0.5316]], grad_fn=<SigmoidBackward0>)


# 3. Training pipeline

In [13]:
loss_func = nn.BCELoss()

In [14]:
optimizer = SGD(params=model.parameters(), lr=0.001)

In [15]:
# TRAINING
for epoch in range(10):
    print("Epoch", epoch)
    for data, label in tqdm(train_loader):
        
        # Switch model into training mode
        model.train()
    
        # Free all gradient in model
        # gradient = 0
        optimizer.zero_grad()
    
        # Forward model
        prediction = model(data)
    
        # Calculate loss
        loss_value = loss_func(prediction, label.unsqueeze(1).float())
        # print(loss_value)
    
        # Backward
        # gradient != 0
        loss_value.backward()
        
        # Optimize - Update weights
        optimizer.step()
        
        # break

Epoch 0


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:47<00:00,  4.37it/s]


Epoch 1


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:50<00:00,  4.10it/s]


Epoch 2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:48<00:00,  4.30it/s]


Epoch 3


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:48<00:00,  4.23it/s]


Epoch 4


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:46<00:00,  4.45it/s]


Epoch 5


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:47<00:00,  4.38it/s]


Epoch 6


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:46<00:00,  4.49it/s]


Epoch 7


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:47<00:00,  4.38it/s]


Epoch 8


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:46<00:00,  4.46it/s]


Epoch 9


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 207/207 [00:46<00:00,  4.42it/s]


In [16]:
# VALIDATION

prediction_list, label_list = [], []
with torch.no_grad():
    for data, label in tqdm(val_loader):
        # Switch model into evaluation mode
        model.eval()

        # Forward model
        prediction = model(data)
        
        # Calculate loss
        loss_value = loss_func(prediction, label.unsqueeze(1).float())
        # print(loss_value)
        
        # Prepare predictions and labels for metric
        prediction_np = prediction.numpy()
        prediction_np[prediction_np >= 0.5] = 1
        prediction_np[prediction_np < 0.5] = 0

        prediction_list.append(prediction_np)
        label_list.append(label.numpy())
        
        # break

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:08<00:00,  5.95it/s]


In [17]:
# Calculate metric

all_prediction = np.concatenate(prediction_list)
all_prediction.shape

(414, 1)

In [18]:
all_label = np.concatenate(label_list)
all_label.shape

(414,)

In [19]:
print(classification_report(all_prediction, all_label))

              precision    recall  f1-score   support

         0.0       0.60      0.76      0.67       180
         1.0       0.76      0.61      0.68       234

    accuracy                           0.67       414
   macro avg       0.68      0.68      0.67       414
weighted avg       0.69      0.67      0.67       414

