In [1]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torchvision.datasets as dsets
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import os
import cv2

### GPU 사용 설정

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


### 데이터셋 불러오기

In [3]:
def make_dataframe_catndog():    
    paths = []
    dataset_type = []
    label = []

    for dirname, _, filenames in os.walk('/content/drive/MyDrive/고모부_머신러닝/dogncat'):
        for filename in filenames:
            if '.jpg' in filename:
                file_path = dirname+'/'+filename
                paths.append(file_path)

            if '/training_set' in file_path:
                dataset_type.append('train')

            elif '/test_set' in file_path:
                dataset_type.append('test')

            else:
                dataset_type.append('N/A') 

            if 'dogs' in file_path:
                label.append('DOG')

            elif 'cats' in file_path:
                label.append('CAT')

            else:
                label.append('N/A')
        
    df = pd.DataFrame({'path' : file_path, 'dataset' : dataset_type, 'label' : label})
    return df

In [4]:
cnd_df = make_dataframe_catndog()

train_df = cnd_df[cnd_df['dataset'] == 'train']
test_df = cnd_df[cnd_df['dataset'] == 'test']

train_img_path = train_df['path'].values
train_label = train_df['label'].values

test_img_path = test_df['path'].values
test_label_path = test_df['label'].values

In [5]:
class CndDataset(Dataset):
    def __init__(self, image_paths, labels):
        self.image_paths = image_paths
        self.labels = labels

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_file = self.image_paths[idx]

        if self.labels is not None:
            label = self.labels[idx]

        # float 형태의 데이터여야 모델에 들어갈 때 작동한다.
        image = np.asarray(cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB), dtype=np.float32).transpose(2,0,1)

        return image, label


In [6]:
cnd_dataset = CndDataset(train_img_path, train_label)
loader = DataLoader(cnd_dataset, batch_size=100, shuffle=True)

In [7]:
batch_data, batch_label = next(iter(loader))
print(batch_data[0].shape)


torch.Size([3, 230, 308])


In [8]:
len(loader)

81

### Build Model

In [9]:
# 뉴런수 5개, 커널사이즈 3x3

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__() 
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 5, kernel_size=3, stride=1, padding=0),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(5, 10, kernel_size=3, stride=1, padding=0),
            nn.ReLU()
        )
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return x



In [10]:
model = CNN()
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
)

In [11]:
data, label = next(iter(loader))
output = model(data)
print(output.shape)

torch.Size([100, 10, 226, 304])


### 매 step마다 shape을 확인하는 것이 중요하다!!
- 매 step마다 shape을 확인할 수 있는 모델을 만들어 보기

In [12]:

class CNN2(nn.Module):
    def __init__(self):
        super(CNN2, self).__init__() 
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 5, kernel_size=3, stride=1, padding=0),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(5, 10, kernel_size=3, stride=1, padding=0),
            nn.ReLU()
        )
    
    def forward(self, x):
        print('input shape :', x.shape)
        x = self.layer1(x)
        print('layer1 output :', x.shape)
        x = self.layer2(x)
        print('layer2 output :', x.shape)
        # return x



In [13]:
model2 = CNN2()
model2

CNN2(
  (layer1): Sequential(
    (0): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
)

In [14]:
data, label = next(iter(loader))
model2(data)

input shape : torch.Size([100, 3, 230, 308])
layer1 output : torch.Size([100, 5, 228, 306])
layer2 output : torch.Size([100, 10, 226, 304])


### 두 개의 CNN을 통과한 후 결과값을 merge하여 fc 레이어를 통과시켜볼 것
- layer1 -> kernel_size : 3x3, neuron : 5, relu
- layer2 -> kernel_size : 7x7, neuron : 7, relu
- merge 방법은 addition
- fc1 -> neuron : 512, linear

In [31]:

class CNN3(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 5, kernel_size=3, stride=1, padding=0),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(3, 7, kernel_size=3, stride=1, padding=0),
            nn.Conv2d(7, 5, kernel_size=1, stride=1, padding=0), # 1x1 convolution연산을 통해 layer1과 layer2의 차원을 맞춰주기
            nn.ReLU()
        )
        self.fc1 = nn.Linear(5 * 228 * 306, 512)

    
    def forward(self, x):
        print(x.shape)
        x1 = self.layer1(x)
        x2 = self.layer2(x)
        print('x1 shape :', x1.shape)
        print('x2 shape :', x2.shape)
        x3 = x1 + x2
        print('x3 :', x3.shape)
        print('x3.size(0) :', x3.size(0))
        fc_input = x3.view(x3.size(0), -1) # view를 통해 x3의 shape을 fc뉴런에 들어갈 수 있는 형태로 바꿔준다. x3.size(0) x3의 shape 중 0번째 차원의 shape을 반환
        print('fc_input :', fc_input.shape)
        output = self.fc1(fc_input)
        print('output :', output.shape)



In [32]:
model3 = CNN3()
model3

CNN3(
  (layer1): Sequential(
    (0): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(3, 7, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(7, 5, kernel_size=(1, 1), stride=(1, 1))
    (2): ReLU()
  )
  (fc1): Linear(in_features=348840, out_features=512, bias=True)
)

In [33]:
data, label  = next(iter(loader))
model3(data)


torch.Size([100, 3, 230, 308])
x1 shape : torch.Size([100, 5, 228, 306])
x2 shape : torch.Size([100, 5, 228, 306])
x3 : torch.Size([100, 5, 228, 306])
x3.size(0) : 100
fc_input : torch.Size([100, 348840])
output : torch.Size([100, 512])
