### 모듈 임포트하기

In [1]:
import os

import numpy as np
import pandas as pd

from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from tqdm.autonotebook import tqdm as notebook_tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

  from .autonotebook import tqdm as notebook_tqdm


### 데이터셋 가져오기

In [2]:
images = []

for dirname, _, filenames in os.walk('./COVID_19_XRAY/train'):
    for filename in filenames:
        if filename == 'labels.csv':
            labels = pd.read_csv('./COVID_19_XRAY/train/labels.csv')
            labels = labels.label.tolist()
            labels = [1 if label=='covid' else 0 for label in labels]
            continue
            
        path = os.path.join(dirname, filename)
        image = Image.open(path)
        image = np.array(image)
        image = torch.tensor(image, dtype=torch.float32)
        
        images.append(image)

### 데이터셋 분류하기

- train(8):valid(2) 

In [3]:
train_img, valid_img, train_labels, valid_labels = train_test_split(images, labels, test_size=0.2, random_state=33)

### CNN 모델 만들기

In [4]:
class CNN(nn.Module):
    def __init__(self, hid_size, kernel_size, out_size, batch_size, dropout=0.3):
        super(CNN, self).__init__()
        self.batch_size = batch_size
        self.k_size = kernel_size
        self.in_size = 10 * (299 - (2 * (self.k_size - 1)))**2
        self.h_size = hid_size
        self.out_size = out_size
        self.dropout = dropout
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=self.k_size, stride=1)
        self.conv2 = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=self.k_size, stride=1)
        self.fc1 = nn.Linear(self.in_size, self.h_size)
        self.fc2 = nn.Linear(self.h_size, self.out_size)
        self.dropout = nn.Dropout(self.dropout)
        
    def forward(self, inputs):
        x = F.relu(self.conv1(inputs))
        #print("conv1 output's size", x.shape)
        x = F.relu(self.conv2(x))
        #print("conv2 output's size", x.shape)
        x = x.view(self.batch_size, -1)
        #print("fc1 input's size", x.shape)
        x = F.relu(self.fc1(x))
        #print("fc1 output's size", x.shape)
        x = self.fc2(x)
        #print("fc2 output's size", x.shape)
        
        return x

### Dataset 준비

In [5]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
        
    def __getitem__(self, idx):
        images = self.images[idx].unsqueeze(0)
        item = {'images': images}
        item['labels'] = self.labels[idx]
        return item
    
    def __len__(self):
        return len(self.labels)

### Train, Test

In [9]:
def train(dataloader, model, optimizer, loss_fn):
    model.train()
    
    epoch_loss = 0
    size = len(dataloader.dataset)
    
    for i, batch in enumerate(dataloader):
        images = batch['images'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(images)
        
        optimizer.zero_grad()
        loss = loss_fn(outputs, labels)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    print('Training Loss: {:.3f}'.format(epoch_loss/size))
    
    
def test(dataloader, model, loss_fn):
    model.eval()
    
    acc = 0
    test_loss = 0
    size = len(dataloader.dataset)
    
    with torch.no_grad():
        for batch in dataloader:
            images = batch['images'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(images)
            
            test_loss += loss_fn(outputs, labels).item()
            acc += (outputs.softmax(1).argmax(1) == labels).type(torch.float).sum().item()
            
        test_loss /= size
        acc /= size
        
        print("Test Loss: {:.3f}, Accuracy: {:.3f}".format(test_loss, acc*100))

모델 생성

In [7]:
batch_size=32

model = CNN(128, 128, 2, batch_size)
model.to(device)

CNN(
  (conv1): Conv2d(1, 3, kernel_size=(128, 128), stride=(1, 1))
  (conv2): Conv2d(3, 10, kernel_size=(128, 128), stride=(1, 1))
  (fc1): Linear(in_features=20250, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

- 하이퍼파라미터 설정
- train, test 돌리기

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
loss_fn = nn.CrossEntropyLoss()
num_epochs = 3

train_dataset = Dataset(train_img, train_labels)
valid_dataset = Dataset(valid_img, valid_img)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)

for i in notebook_tqdm(range(num_epochs)):
    print("Epoch {:}".format(i+1))
    train(train_dataloader, model, optimizer, loss_fn)
    test(valid_dataloader, model, loss_fn)

  0%|                                                                                            | 0/3 [00:00<?, ?it/s]

Epoch 1
