# 0. Import Module

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np

import cv2
import matplotlib.pyplot as plt

import time
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn

from sklearn.metrics import f1_score, accuracy_score

# 1. Read Dataset

In [None]:
data_dir = "/content/drive/MyDrive/Colab Notebooks/data/"

In [None]:
train_df = pd.read_csv(data_dir + "train_df.csv").drop(columns="index")

train_df.head()

Unnamed: 0,file_name,class,state,label
0,10000.png,transistor,good,transistor-good
1,10001.png,capsule,good,capsule-good
2,10002.png,transistor,good,transistor-good
3,10003.png,wood,good,wood-good
4,10004.png,bottle,good,bottle-good


In [None]:
df_train = train_df[["file_name", "label"]]

df_train.head()

Unnamed: 0,file_name,label
0,10000.png,transistor-good
1,10001.png,capsule-good
2,10002.png,transistor-good
3,10003.png,wood-good
4,10004.png,bottle-good


In [None]:
train_labels = df_train["label"]
label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key, value in zip(label_unique, range(len(label_unique)))}

In [None]:
df_train.replace(label_unique, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [None]:
df_train

Unnamed: 0,file_name,label
0,10000.png,72
1,10001.png,15
2,10002.png,72
3,10003.png,76
4,10004.png,3
...,...,...
4272,14272.png,72
4273,14273.png,72
4274,14274.png,28
4275,14275.png,84


# 3. Define Augmentations

In [None]:
# image = cv2.imread(path + file_name)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# plt.imshow(image)

In [None]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensor

In [None]:
trn_transform = A.Compose([
    A.Resize(256, 256),
    A.OneOf([
            A.HorizontalFlip(p=1),
            A.RandomRotate90(p=1),
            A.VerticalFlip(p=1), 
            A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=1),           
    ], p=1),
    A.OneOf([
            A.MotionBlur(p=1),
            A.Blur(blur_limit=5, p=1),
            A.GaussNoise(p=1)                 
    ], p=1),
    A.Normalize(mean=(0, 0, 0), std=(1, 1, 1)),
    ToTensor()
])

vid_transform = A.Compose([
   A.Resize(256, 256),
   A.Normalize(mean=(0, 0, 0), std=(1, 1, 1)),
   ToTensor()
])

In [None]:
# transformed = trn_transform(image=image)
# transformed_image = transformed["image"]

# plt.imshow(transformed_image)

# 4. Define Dataset

### 이미지 처리 및 타 패키지 비교

In [None]:
idx = 0
path = data_dir + 'train/'
file_name = df_train['file_name'].iloc[idx]

In [None]:
# # PIL 패키지

# from PIL import Image
# import matplotlib.pyplot as plt
# %matplotlib inline

# before_time = time.time()

# # PIL은 open()으로 image file을 읽어서 imageFile객체로 생성
# pil_image = Image.open(path + file_name)
# image = np.array(pil_image)
# after_time = time.time()

# print(f"PIL image load time: {after_time - before_time}")
# print(type(image))

In [None]:
# # skimage 패키지 

# from skimage import io

# before_time = time.time()

# # skimage는 imread()를 이용하여 image를 numpy 배열로 반환함.
# sk_image = io.imread(path + file_name)
# after_time = time.time()

# print(f"skimage image load time: {after_time - before_time}")
# print(type(sk_image))

In [None]:
# import cv2
# before_time = time.time()

# # opencv는 imread()를 이용하여 원본 RGB 이미지를 BGR 형태의 넘파이 배열로 반환함.
# cv_image = cv2.imread(path + file_name)
# cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
# after_time = time.time()

# print(f"opencv image load time: {after_time - before_time}")
# print(type(cv_image))

### 데이터셋 정의

In [None]:
class Anomaly_Dataset(Dataset):
  def __init__(self, csv, path, train='train', transform=None):
    self.csv = csv
    self.path = path
    self.train = train 
    self.transform = transform
  
  def __len__(self):
    return len(self.csv)

  def __getitem__(self, idx):
    img_path = self.path + self.train + '/' + self.csv["file_name"][idx]

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    if self.transform:
      img = self.transform(image=img)["image"]

    label = self.csv["label"][idx]

    return img, label

# 5. Make DataLoader

In [None]:
dataset = Anomaly_Dataset(df_train, data_dir, transform=trn_transform)

In [None]:
trn_loader = DataLoader(dataset,
                        shuffle=True,
                        batch_size=8)

In [None]:
# # dataloader가 잘 돌아가는지 확인
# for images, labels in trn_loader:
#   break

In [None]:
print("=====image informations=====")
print(images.shape) # [batch_size, channel, width, height]
print(type(images))
print(images.dtype)

print("\n=====labels informations=====")
print(labels.shape) # [batch_size, target_nums]
print(type(labels))
print(labels.dtype)

=====image informations=====
torch.Size([8, 3, 256, 256])
<class 'torch.Tensor'>
torch.float32

=====labels informations=====
torch.Size([8])
<class 'torch.Tensor'>
torch.int64


# 6. Define Model

In [None]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 8, 3, 1, 1) # (in_channel, out_channel, kernel_size, stride, padding)
    self.conv2 = nn.Conv2d(8, 16, 3, 1, 1)
    self.conv3 = nn.Conv2d(16, 32, 3, 1, 1)
    self.conv4 = nn.Conv2d(32, 64, 3, 1, 1)
    self.conv5 = nn.Conv2d(64, 64, 3, 1, 1)
    self.maxpool = nn.MaxPool2d(2)

    self.fc1 = nn.Linear(64 * 16 * 16, 3136)

    self.regressor = nn.Linear(3136, 88)

  def forward(self, X):
    # (batch, 3, 256, 256)
    X = F.relu(self.conv1(X))
    X = self.maxpool(X)

    # (batch, 8, 128, 128)
    X = F.relu(self.conv2(X))
    X = self.maxpool(X)

    # (batch, 16, 64, 64)
    X = F.relu(self.conv3(X))
    X = self.maxpool(X)

    # (batch, 32, 32, 32)
    X = F.relu(self.conv4(X))
    X = self.maxpool(X)

    # (batch, 64, 16, 16) -> Flatten(batch, 64 * 16 * 16(=16384))
    X = torch.flatten(X, start_dim=1)

    X = self.fc1(X) # (batch, 16384)

    # Regressor(batch, 3136) -> (batch, 88)
    out = self.regressor(X)
    return out

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"using {device} device")

using cuda device


In [None]:
model = CNN().to(device) # 인스턴스 생성
print(model)

CNN(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16384, out_features=3136, bias=True)
  (regressor): Linear(in_features=3136, out_features=88, bias=True)
)


# Optimization

In [None]:
# 하이퍼파라미터
learning_rate = 0.001
epochs = 5

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)

  for batch, (X, y) in enumerate(dataloader):
    X = X.to(device)
    y = y.to(device)

    # 예측
    pred = model(X)

    # 손실
    loss = loss_fn(pred, y)

    # 역전파
    optimizer.zero_grad() # gradient를 0으로 초기화
    loss.backward() # 예측 손실을 역전파
    optimizer.step() # 역전파 단계에서 수집된 변화도로 매개변수 조정

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X) # dataset: 총 4277개, batch_size = 8 -> 555개의 batch 존재
      print(f"loss: {loss:>7f}    [{current:>5d} / {size:>5d}]")

In [None]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X) # [bath_size, class_nums]
            print((pred.argmax(1)==y).type(torch.float).sum())
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches # 평균값
    correct /= size # 맞게 예측한 개수 / 전체 개수
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# 7. Train

In [None]:
# 손실함수
loss_fn = nn.CrossEntropyLoss()

# 옵티마이저
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for i in range(epochs):
  print(f"Epoch {i+1}\n------------------------------")
  train_loop(trn_loader, model, loss_fn, optimizer)
print("Done!")

Epoch 1
------------------------------
loss: 4.483861    [    0 /  4277]
loss: 0.920115    [  800 /  4277]
loss: 0.598442    [ 1600 /  4277]
loss: 0.850447    [ 2400 /  4277]
loss: 0.625620    [ 3200 /  4277]
loss: 0.550831    [ 4000 /  4277]
Epoch 2
------------------------------
loss: 1.080445    [    0 /  4277]
loss: 2.069396    [  800 /  4277]
loss: 1.879634    [ 1600 /  4277]
loss: 1.453109    [ 2400 /  4277]
loss: 0.768444    [ 3200 /  4277]
loss: 0.282486    [ 4000 /  4277]
Epoch 3
------------------------------
loss: 1.079906    [    0 /  4277]
loss: 0.726559    [  800 /  4277]
loss: 0.268285    [ 1600 /  4277]
loss: 0.337882    [ 2400 /  4277]
loss: 0.554853    [ 3200 /  4277]
loss: 0.607296    [ 4000 /  4277]
Epoch 4
------------------------------
loss: 0.909660    [    0 /  4277]
loss: 0.830180    [  800 /  4277]
loss: 0.120272    [ 1600 /  4277]
loss: 1.116854    [ 2400 /  4277]
loss: 0.745863    [ 3200 /  4277]
loss: 0.133972    [ 4000 /  4277]
Epoch 5
--------------------

In [None]:
model

# Split Dataset

In [None]:
# df_train['fold'] = pd.read_csv(data_dir + 'df_folds.csv')['fold']

# df_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,file_name,label,fold
0,10000.png,72,0
1,10001.png,15,0
2,10002.png,72,0
3,10003.png,76,0
4,10004.png,3,0
...,...,...,...
4272,14272.png,72,4
4273,14273.png,72,4
4274,14274.png,28,4
4275,14275.png,84,4


In [None]:
# trn_fold = [i for i in range(5) if i not in [4]]
# vid_fold = [4]

# trn_idx = train_df.loc[df_train["fold"].isin(trn_fold)].index
# vid_idx = train_df.loc[df_train["fold"].isin(vid_fold)].index

In [None]:
# trn_idx

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            3752, 3769, 3819, 3857, 3879, 3907, 3989, 4014, 4125, 4193],
           dtype='int64', length=3422)

In [None]:
# vid_idx

Int64Index([2253, 2384, 2587, 2631, 2783, 2819, 2895, 2909, 2914, 2992,
            ...
            4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276],
           dtype='int64', length=855)