In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import SGD
from torch.optim.lr_scheduler import LinearLR
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.metrics import accuracy_score, recall_score, precision_score

### GPU 사용 설정

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

### 데이터 가져오기

In [3]:
# /content/drive/MyDrive/고모부_머신러닝/dogncat
paths = []
dataset_type = []
labels = []

def make_dataframe(dirpath):
    for dirname, _, filenames in os.walk(dirpath):
        for filename in filenames:
            file_path = dirname+'/'+filename
            paths.append(file_path)

            if '/training_set' in file_path:
                dataset_type.append('train')
            elif '/test_set' in file_path:
                dataset_type.append('test')
            else:
                dataset_type.append('N/A')
            
            if 'dogs' in file_path:
                labels.append('DOG')
            elif 'cats' in file_path:
                labels.append('CAT')
            else:
                labels.append('N/A')

    df = pd.DataFrame({'path' : paths, 'type' : dataset_type, 'label' : labels})

    return df

In [4]:
cnd_df = make_dataframe('/content/drive/MyDrive/고모부_머신러닝/dogncat')
cnd_df.head()

Unnamed: 0,path,type,label
0,/content/drive/MyDrive/고모부_머신러닝/dognc...,test,CAT
1,/content/drive/MyDrive/고모부_머신러닝/dognc...,test,CAT
2,/content/drive/MyDrive/고모부_머신러닝/dognc...,test,CAT
3,/content/drive/MyDrive/고모부_머신러닝/dognc...,test,CAT
4,/content/drive/MyDrive/고모부_머신러닝/dognc...,test,CAT


In [5]:
cnd_df = cnd_df[cnd_df['path'].str.contains('.jpg')].copy() # '.jpg'파일만 저장
cnd_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 10028 entries, 0 to 10031
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   path    10028 non-null  object
 1   type    10028 non-null  object
 2   label   10028 non-null  object
dtypes: object(3)
memory usage: 313.4+ KB


In [6]:
# train, valid dataset 만들기

from sklearn.model_selection import train_test_split

# train, validation 분리
train_df, valid_df = train_test_split(cnd_df[cnd_df['type']=='train'], test_size = 0.25)

# train dataset
train_data = train_df['path'].values
train_label = train_df['label'].values
train_label_indices = train_df['label'].replace(['CAT', 'DOG'], [0, 1]).values

# validation dataset
valid_data = valid_df['path'].values
valid_label = valid_df['label'].values
valid_label_indices = valid_df['label'].replace(['CAT', 'DOG'], [0,1]).values


In [7]:
train_df.head()

Unnamed: 0,path,type,label
4140,/content/drive/MyDrive/고모부_머신러닝/dognc...,train,CAT
7251,/content/drive/MyDrive/고모부_머신러닝/dognc...,train,DOG
4798,/content/drive/MyDrive/고모부_머신러닝/dognc...,train,CAT
7654,/content/drive/MyDrive/고모부_머신러닝/dognc...,train,DOG
3622,/content/drive/MyDrive/고모부_머신러닝/dognc...,train,CAT


In [8]:
count_c = 0
count_d = 0
for file_name in valid_data:
    if 'cats' in file_name:
        count_c += 1
    elif 'dogs' in file_name:
        count_d += 1

print(count_c, count_d)

983 1019


### 샘플용 데이터 만들기

In [9]:
sample_dog = train_df[train_df['label'] == 'DOG'].sample(30)
sample_cat = train_df[train_df['label'] == 'CAT'].sample(30)

sample_df = pd.concat([sample_dog, sample_cat])
# sample_df = sample_df.sample(frac=1).reset_index()
sample_tr, sample_val = train_test_split(sample_df, test_size=0.2)

In [10]:
sample_tr['label'].value_counts()

CAT    26
DOG    22
Name: label, dtype: int64

In [11]:
sample_tr_data = sample_tr['path'].values
sample_tr_label = sample_tr['label'].replace(['CAT', 'DOG'], [0,1]).values
sample_val_data = sample_val['path'].values
sample_val_label = sample_val['label'].replace(['CAT', 'DOG'], [0,1]).values

### 테스트 데이터 만들기

In [12]:
# test 데이터 만들기
test_data = cnd_df['path'][cnd_df['type']=='test']
test_data

0       /content/drive/MyDrive/고모부_머신러닝/dognc...
1       /content/drive/MyDrive/고모부_머신러닝/dognc...
2       /content/drive/MyDrive/고모부_머신러닝/dognc...
3       /content/drive/MyDrive/고모부_머신러닝/dognc...
4       /content/drive/MyDrive/고모부_머신러닝/dognc...
                              ...                        
2020    /content/drive/MyDrive/고모부_머신러닝/dognc...
2021    /content/drive/MyDrive/고모부_머신러닝/dognc...
2022    /content/drive/MyDrive/고모부_머신러닝/dognc...
2023    /content/drive/MyDrive/고모부_머신러닝/dognc...
2024    /content/drive/MyDrive/고모부_머신러닝/dognc...
Name: path, Length: 2023, dtype: object

### Custom dataset 만들기

In [13]:
class MyDataset(Dataset):
    def __init__(self, datapath, label=None):
        super(MyDataset, self).__init__()

        self.path = datapath
        self.label = label

    def __len__(self):
        return len(self.path)

    def __getitem__(self,idx):

        image = cv2.cvtColor(cv2.imread(self.path[idx]), cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (244,244))
        image = np.asarray(image, dtype=np.float32).transpose(2,0,1)
        normal_image = (image - np.amin(image)) / (np.amax(image) - np.amin(image))
        
        if self.label is not None:
            label = self.label[idx]

        return normal_image, label


In [14]:
train_dataset = MyDataset(train_data, train_label_indices)
valid_dataset = MyDataset(valid_data, valid_label_indices)
test_dataset = MyDataset(test_data)

### 샘플용 dataset만들기

In [15]:
sample_tr_dataset = MyDataset(sample_tr_data, sample_tr_label)
sample_val_dataset = MyDataset(sample_val_data, sample_val_label)

In [17]:
print(len(sample_tr_dataset))
print(len(sample_val_dataset))

48
12


### Loader 만들기

In [16]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False) # 왜 valid loader에서는 shuffle을 false로 하지?

### 샘플용 loader 만들기

In [18]:
sample_tr_loader = DataLoader(sample_tr_dataset, batch_size=4, shuffle=True)
sample_val_loader = DataLoader(sample_val_dataset, batch_size=2, shuffle=False)

In [19]:
len(sample_val_loader)

6

### 모델 만들기

In [20]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 5, kernel_size=3, padding='same')
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(5*244*244, 2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # print('input size :', x.shape)
        # print('input max :', torch.amax(x, dim=(1,2,3)))
        # print('input min :', torch.amin(x, dim=(1,2,3)))
        # print('input l2norm :', torch.linalg.vector_norm(x, dim=(1,2,3)))

        conv = self.conv1(x)
        # print('conv size :', conv.shape)
        # print('conv max :', torch.amax(conv, dim=(1,2,3)))
        # print('conv min :', torch.amin(conv, dim=(1,2,3)))
        # print('conv l2norm :', torch.linalg.vector_norm(conv, dim=(1,2,3)))

        conv_out = self.relu(conv)
        # print('conv_out size :', conv_out.shape)
        # print('conv_out max :', torch.amax(conv_out, dim=(1,2,3)))
        # print('conv_out min :', torch.amin(conv_out, dim=(1,2,3)))
        # print('conv_out l2norm :', torch.linalg.vector_norm(conv_out, dim=(1,2,3)))

        fc_input = conv_out.view(conv_out.size(0), -1)
        # print('fc_input size :', fc_input.shape)
        # print('fc_input max :', torch.amax(fc_input, dim=(1)))
        # print('fc_input min :', torch.amin(fc_input, dim=(1)))
        # print('fc_input l2norm :', torch.linalg.vector_norm(fc_input, dim=(1)))

        fc_logit = self.fc1(fc_input)
        # print('fc_logit size :', fc_logit.shape)
        # print('fc_logit max :', torch.amax(fc_logit, dim=(1)))
        # print('fc_logit min :', torch.amin(fc_logit, dim=(1)))
        # print('fc_logit l2norm :', torch.linalg.vector_norm(fc_logit, dim=(1)))
        # print()

        return fc_logit

In [21]:
model = CNN().to(device)
model

CNN(
  (conv1): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (relu): ReLU()
  (fc1): Linear(in_features=297680, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
)

In [35]:

ex_image, ex_label = next(iter(sample_tr_loader))
ex_image = ex_image.to(device)
ex_label = ex_label.to(device)
print(ex_image.shape)
print(ex_label)
model_logit, model_output = model(ex_image)
print(model_logit)
print(model_output)

torch.Size([4, 3, 244, 244])
tensor([1, 1, 1, 0], device='cuda:0')
tensor([[-0.5536,  0.6319],
        [ 0.0263, -0.0585],
        [-0.4439,  0.3296],
        [ 0.1677, -0.2683]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[0.2341, 0.7659],
        [0.5212, 0.4788],
        [0.3157, 0.6843],
        [0.6073, 0.3927]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


In [38]:
print(type(ex_label))

<class 'torch.Tensor'>


In [36]:
print(type(model_logit))
print(type(model_output))

<class 'torch.Tensor'>
<class 'torch.Tensor'>


In [37]:
print(torch.argmax(model_output, dim=1))

tensor([1, 0, 1, 0], device='cuda:0')


In [43]:
print(type(model_output.cpu().detach().numpy()))

<class 'numpy.ndarray'>


### loss function

In [22]:
loss_fn = nn.CrossEntropyLoss().to(device)

### optimzer, lr_scheduler 설정

In [23]:
optimizer = SGD(model.parameters(), lr=0.001)
scheduler = LinearLR(optimizer)

### 모델 training

In [None]:
# accuracy, recall, precision 구하는 함수 만들기!!

def confusion_matrix(predicted, label, class_num):
    TP_num = 0
    TN_num = 0
    FP_num = 0
    FN_num = 0
    for i in range(label):
        pred_value = predicted[i]
        label_value = label[i]

        for class_idx in range(class_num):
            if (pred_value == class_idx) and (label_value) == class_idx:
                TP_num += 1
                
                

In [24]:
def train_one_epoch(loader):
    running_loss = 0.
    last_loss = 0.

    for i, data in enumerate(loader):
        image_data, label_data = data
        image_data = image_data.to(device)
        label_data = label_data.to(device)

        optimizer.zero_grad()

        hyphothesis = model(image_data)

        loss = loss_fn(hyphothesis, label_data)

        loss.backward()

        # print(f'batch{i+1} loss : {loss}')
        # print()
        # print(f'gradient max :', torch.amax(torch.tensor([torch.amax(param.grad) for param in model.parameters()])))
        # print(f'gradient min :', torch.amin(torch.tensor([torch.amax(param.grad) for param in model.parameters()])))
        # print(f'gradient l2norm :', torch.amax(torch.tensor([torch.linalg.vector_norm(param.grad) for param in model.parameters()])))
        # print()
        optimizer.step()

        running_loss += loss.item()

        # last_loss = running_loss / len(loader)
        # print(f'Average batch loss :', last_loss)
        # print()
        # last_loss = 0


        if i+1 == len(loader):
            last_loss = running_loss / len(loader)
            print(f'Average batch loss :', last_loss)
            print()
            running_loss = 0

    return last_loss


### Training, Evaluation

In [25]:
epoch = 30
running_loss = 0.
running_vloss = 0.
for i in range(epoch):
    print(f'========= Epoch{i+1} =========')
    print()
    # train
    model.train()
    for batch_idx, data in enumerate(sample_tr_loader):
        image_data, label_data = data
        image_data = image_data.to(device)
        label_data = label_data.to(device)
        optimizer.zero_grad()
        tr_output= model(image_data)
        loss = loss_fn(tr_output, label_data)
        print(f'batch{batch_idx+1} loss :', loss)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()

        if batch_idx + 1 == len(sample_tr_loader):
            avg_loss = running_loss / len(sample_tr_loader)
            print('Loss/train :', avg_loss)
            running_loss = 0.

    #evaluate
    model.eval()
    for batch_idx, val_data in enumerate(sample_tr_loader): # training data로 우선 evaluation해보기
        valid_image, valid_label = val_data
        valid_image = valid_image.to(device)
        valid_label = valid_label.to(device)
        val_output = model(valid_image)
        val_output_idx = torch.argmax(val_output, dim=1)
        vloss = loss_fn(val_output, valid_label)
        running_vloss += vloss.item()
        
        if batch_idx + 1 == len(sample_tr_loader):
            avg_vloss = running_vloss / len(sample_tr_loader)
            print('Loss/valid :', avg_vloss)
            print('Accuracy :', accuracy_score(valid_label.cpu().detach().numpy(), val_output_idx.cpu().detach().numpy()))
            print('Recall :', recall_score(valid_label.cpu().detach().numpy(), val_output_idx.cpu().detach().numpy()))
            print('Precision :', precision_score(valid_label.cpu().detach().numpy(), val_output_idx.cpu().detach().numpy()))
            running_vloss = 0.



batch1 loss : tensor(0.7340, grad_fn=<NllLossBackward0>)
batch2 loss : tensor(2.3638, grad_fn=<NllLossBackward0>)
batch3 loss : tensor(2.1353, grad_fn=<NllLossBackward0>)
batch4 loss : tensor(1.4521, grad_fn=<NllLossBackward0>)
batch5 loss : tensor(0.6541, grad_fn=<NllLossBackward0>)
batch6 loss : tensor(0.8021, grad_fn=<NllLossBackward0>)
batch7 loss : tensor(1.5517, grad_fn=<NllLossBackward0>)
batch8 loss : tensor(3.2341, grad_fn=<NllLossBackward0>)
batch9 loss : tensor(1.7181, grad_fn=<NllLossBackward0>)
batch10 loss : tensor(0.6439, grad_fn=<NllLossBackward0>)
batch11 loss : tensor(0.6215, grad_fn=<NllLossBackward0>)
batch12 loss : tensor(0.9571, grad_fn=<NllLossBackward0>)
Loss/train : 1.405660738547643
Loss/valid : 1.4467197880148888
Accuracy : 0.25
Recall : 1.0
Precision : 0.25

batch1 loss : tensor(1.9417, grad_fn=<NllLossBackward0>)
batch2 loss : tensor(2.1332, grad_fn=<NllLossBackward0>)
batch3 loss : tensor(0.0997, grad_fn=<NllLossBackward0>)
batch4 loss : tensor(0.8663, gr

  _warn_prf(average, modifier, msg_start, len(result))


batch5 loss : tensor(0.4804, grad_fn=<NllLossBackward0>)
batch6 loss : tensor(0.6912, grad_fn=<NllLossBackward0>)
batch7 loss : tensor(0.1402, grad_fn=<NllLossBackward0>)
batch8 loss : tensor(0.6898, grad_fn=<NllLossBackward0>)
batch9 loss : tensor(0.7798, grad_fn=<NllLossBackward0>)
batch10 loss : tensor(0.7124, grad_fn=<NllLossBackward0>)
batch11 loss : tensor(0.4660, grad_fn=<NllLossBackward0>)
batch12 loss : tensor(0.3798, grad_fn=<NllLossBackward0>)
Loss/train : 0.5482522062957287
Loss/valid : 0.3298368714749813
Accuracy : 0.5
Recall : 0.3333333333333333
Precision : 1.0

batch1 loss : tensor(0.3123, grad_fn=<NllLossBackward0>)
batch2 loss : tensor(0.8508, grad_fn=<NllLossBackward0>)
batch3 loss : tensor(0.2500, grad_fn=<NllLossBackward0>)
batch4 loss : tensor(0.7139, grad_fn=<NllLossBackward0>)
batch5 loss : tensor(0.3102, grad_fn=<NllLossBackward0>)
batch6 loss : tensor(0.4948, grad_fn=<NllLossBackward0>)
batch7 loss : tensor(0.1225, grad_fn=<NllLossBackward0>)
batch8 loss : tens

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


batch5 loss : tensor(0.1538, grad_fn=<NllLossBackward0>)
batch6 loss : tensor(0.1841, grad_fn=<NllLossBackward0>)
batch7 loss : tensor(0.2077, grad_fn=<NllLossBackward0>)
batch8 loss : tensor(0.1903, grad_fn=<NllLossBackward0>)
batch9 loss : tensor(0.1700, grad_fn=<NllLossBackward0>)
batch10 loss : tensor(0.1240, grad_fn=<NllLossBackward0>)
batch11 loss : tensor(0.1935, grad_fn=<NllLossBackward0>)
batch12 loss : tensor(0.2533, grad_fn=<NllLossBackward0>)
Loss/train : 0.1858921218663454
Loss/valid : 0.15569936111569405
Accuracy : 1.0
Recall : 1.0
Precision : 1.0

batch1 loss : tensor(0.2211, grad_fn=<NllLossBackward0>)
batch2 loss : tensor(0.2042, grad_fn=<NllLossBackward0>)
batch3 loss : tensor(0.0802, grad_fn=<NllLossBackward0>)
batch4 loss : tensor(0.1089, grad_fn=<NllLossBackward0>)
batch5 loss : tensor(0.2014, grad_fn=<NllLossBackward0>)
batch6 loss : tensor(0.2454, grad_fn=<NllLossBackward0>)
batch7 loss : tensor(0.1916, grad_fn=<NllLossBackward0>)
batch8 loss : tensor(0.1903, gra

### avg_loss, avg_vloss가 계속 같은 값으로 나오는 현상
- 학습이 진행되도 avg_loss와 avg_vloss가 계속 같은 값으로 나오는 현상이 발생했는데 처음에는 데이터 셋이나 모델에 문제가 있나 해서 살펴보았는데 딱히 문제는 없었다. 어디가 문제인가 살펴보니 loss가 변하지 않는다는 것은 파라미터 업데이트가 안된다는 것을 의미했고 다시 살펴보니 모델을 변경한 후에 optimizer를 새롭게 설정해주지 않아서 생긴 문제였다.

### 전체 데이터로 모델을 돌리기 전에 샘플 데이터로 우선적으로 돌려보기
- 모델 training은 많은 비용이 들어가는 작업이다. 특히, 데이터가 많고 클수록. 그렇기 때문에 비용을 절약하기 위해서는 전체 학습 데이터로 모델을 학습하기 전에 샘플 데이터로 우선적으로 학습을 진행하여 모델이 정상적으로 작동하는지 확인하는 작업이 필요하다.

### accuracy, recall, precision을 구해봤는데 0.75, 0, 0이 나왔다. 왜지..?
- recall이랑 precision이 0이 나왔다는 건 TP가 0이라는 건데 그렇게 되면 accuracy가 0.75가 나올 수가 없다. 데이터셋이 불균형한 경우가 아닌 이상 나와봐야 0.5인데..(현재 -> 개 : 30, 고양이 : 30)
- 일단 모든 결과값을 출력해서 어디가 잘못된 것인지 확인해보자...