In [52]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import os
import cv2

In [53]:
# GPU
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [54]:
img_size = 224

In [55]:
# 이미지 파일 로드.
def load_image(path):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (img_size, img_size))
    return img

In [56]:
# 데이터셋 클래스 정의.
class XRayDataset(Dataset):
    def __init__(self, data_dir, df, transform=None):
        self.data_dir = data_dir
        self.df = df
        self.transform = transform
        
        # train 데이터의 이미지와 라벨 로드.
        self.images = []
        self.labels = []
        
        for i in range(len(self.df)):
            path = os.path.join(self.data_dir, self.df.iloc[i]["filename"])
            label = self.df.iloc[i]["label"]
            img = load_image(path)
            self.images.append(img)
            self.labels.append(label)
        
        # 라벨 one-hot encoding으로 변환.
        self.labels = np.array(self.labels)
        self.labels = torch.from_numpy(self.labels).long()
        self.labels = torch.nn.functional.one_hot(self.labels, num_classes=2).float()
        
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        path = os.path.join(self.data_dir, self.df.iloc[idx]["filename"])
        image = load_image(path)
        label = self.df.iloc[idx]["label"]
        label = torch.tensor(label, dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label

In [57]:
# train 데이터셋 처리.
data_dir = "/Users/inho/KDT_AI/COVID_19_XRAY/train"
train_csv = pd.read_csv(os.path.join(data_dir, "/Users/inho/KDT_AI/COVID_19_XRAY/train/labels.csv"))
train_csv['label'] = train_csv['label'].apply(lambda x: 1 if x == 'covid' else 0)
train_dataset = XRayDataset(data_dir, train_csv)

In [58]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = XRayDataset(data_dir, train_csv, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [59]:
# 모델 정의
import torchvision.models as models

model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)

model = model.to(device)

# 모델 컴파일
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)



In [60]:
# 모델 학습
num_epochs = 10
for epoch in range(num_epochs):
    train_loss = 0.0
    train_acc = 0.0
    train_total = 0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_acc += (predicted == labels).sum().item()
        train_loss += loss.item() * images.size(0)
        
    train_loss /= train_total
    train_acc /= train_total
    
    print("Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}".format(epoch+1, num_epochs, train_loss, train_acc))

Epoch [1/10], Train Loss: 0.1695, Train Acc: 0.9290
Epoch [2/10], Train Loss: 0.0405, Train Acc: 0.9855
Epoch [3/10], Train Loss: 0.0387, Train Acc: 0.9880
Epoch [4/10], Train Loss: 0.0155, Train Acc: 0.9940
Epoch [5/10], Train Loss: 0.0165, Train Acc: 0.9945
Epoch [6/10], Train Loss: 0.0106, Train Acc: 0.9975
Epoch [7/10], Train Loss: 0.0017, Train Acc: 0.9995
Epoch [8/10], Train Loss: 0.0007, Train Acc: 1.0000
Epoch [9/10], Train Loss: 0.0004, Train Acc: 1.0000
Epoch [10/10], Train Loss: 0.0010, Train Acc: 1.0000


In [61]:
# 테스트 데이터셋 처리
test_dir = "/Users/inho/KDT_AI/COVID_19_XRAY/test"
test_images = sorted(os.listdir(test_dir))
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

class XRayTestDataset(Dataset):
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform
    
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        path = os.path.join(test_dir, self.images[idx])
        image = load_image(path)

        if self.transform:
            image = self.transform(image)

        return image

test_dataset = XRayTestDataset(test_images, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [62]:
# 테스트 데이터셋 예측
model.eval()
result = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        result.extend(predicted.cpu().numpy())

In [63]:
# submission.csv 파일 생성하기.
df = pd.DataFrame({'filename': [f'image_{i:03d}.png' for i in range(1,len(result)+1)], 'label': result})
df['label'] = df['label'].apply(lambda x: 'covid' if x == 1 else 'normal')
df.to_csv('submission.csv', index=False)