# Data Load

In [3]:
!pip install kaggle --upgrade



In [4]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"ldw2006","key":"26e4aa213f5af15a6f8c3199c36f276f"}'}

In [5]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [6]:
!ls -1ha kaggle.json

kaggle.json


In [7]:
!kaggle datasets download -d andyczhao/covidx-cxr2

Dataset URL: https://www.kaggle.com/datasets/andyczhao/covidx-cxr2
License(s): other
Downloading covidx-cxr2.zip to /content
100% 28.9G/28.9G [05:40<00:00, 149MB/s]
100% 28.9G/28.9G [05:42<00:00, 90.6MB/s]


In [8]:
import multiprocessing

# 시스템의 CPU 코어 수 확인
num_cpu_cores = multiprocessing.cpu_count()
print(f"CPU 코어 수: {num_cpu_cores}")


CPU 코어 수: 2


In [10]:
import zipfile
import io
import pandas as pd
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

class CovidXZipDataset(Dataset):
    def __init__(self, zip_file, txt_file, Type, transform=None):
        self.zip_path = zip_file
        self.zip_file = zipfile.ZipFile(zip_file, 'r')
        self.Type = Type
        self.annotations = self.MakeDataFrame(txt_file)
        self.transform = transform

    def MakeDataFrame (self, file_path:str) -> list :
        '''
        데이터 프레임 생성 함수
        '''
        data = []
        with open(file_path, 'r') as file:
            for line in file:
                parts = line.strip().split()
                image_path = parts[1]
                label = 1 if parts[2] == 'positive' else 0
                data.append((image_path, label))

        df = pd.DataFrame(data, columns=['image_path', 'label'])
        return df

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_name = self.annotations.iloc[idx, 0]
        label = int(self.annotations.iloc[idx, 1])
        with zipfile.ZipFile(self.zip_path, 'r') as zip_ref:
          with zip_ref.open(self.Type+img_name) as img_file:
            image = Image.open(io.BytesIO(img_file.read())).convert("L")

        if self.transform:
            image = self.transform(image)

        return image, label

# 경로 설정
zip_path = '/content/covidx-cxr2.zip'
train_txt = '/content/train.txt'
test_txt = '/content/test.txt'
valid_txt = '/content/val.txt'

# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

# 데이터셋 및 데이터로더 생성
train_dataset = CovidXZipDataset(zip_file=zip_path, txt_file=train_txt, Type = 'train/', transform=transform)
test_dataset = CovidXZipDataset(zip_file=zip_path, txt_file=test_txt, Type = 'test/', transform=transform)
valid_dataset = CovidXZipDataset(zip_file=zip_path, txt_file=valid_txt, Type = 'val/', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=1)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False, num_workers=1)


# CNN Model Def

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

class SCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SCNN, self).__init__()
        self.num_classes = num_classes
        self.feature = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),  # 32x32

            nn.Conv2d(32, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),  # 16x16

            nn.Conv2d(64, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2)   # 8x8
        )

        self.flatten = Flatten()
        self.fc_layer = nn.Linear(128 * 8 * 8, num_classes)  # 128 * 8 * 8 = 8192

    def forward(self, x):
        feature = self.feature(x)
        feature = self.flatten(feature)
        out = self.fc_layer(feature)
        return [feature, out]

transform = transforms.Compose([
    transforms.Resize((64, 64)),  # 이미지 크기 조정
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])



# Learn

In [None]:
import os
import time

In [None]:
file_path = '/content/MNIST/valid/train.txt'

batch = 512

dataloader = test_loader

Target = SCNN(2)

Target.cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Target.parameters(), lr=0.001)


epochs = 100
step = 0

In [None]:
%%time

for epoch in range(epochs):
    start = time.time()
    total_loss = 0

    for i, imgs in enumerate(dataloader):
        #xb, yb = torch.autograd.Variable(xb), torch.autograd.Variable(yb)
        step += 1

        pred = [Target(imgs[0].cuda())[-1]]
        loss = criterion(pred[0].cuda(), imgs[1].cuda())

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()

        if step % 100 == 0 :
          print("Learning...", step)
          print(loss.item())
          print("--------------------")

    torch.save({'state_dict':Target.state_dict()}, os.path.join('/content/target/', "SCNN.tar"))


# Test

In [18]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [20]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [28]:
def load_model(model_path):
    model = SCNN(num_classes=2)
    model.load_state_dict(torch.load(model_path, map_location=device)['state_dict'])
    return model.eval()

def evaluate(model, dataloader, criterion):
    global outputs
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    n = 0
    with torch.no_grad():
        for imgs, labels in dataloader:
            if n > 100 :
              break
            else :
              n+=1
            print(n)
            outputs = model(imgs)[-1]

            _, predicted = torch.min(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            print(f'Predicted: {predicted.cpu().numpy()}, Actual: {labels.cpu().numpy()}')

    accuracy = 100 * correct / total
    avg_loss = total_loss / len(dataloader)
    return avg_loss, accuracy

model_path = '/content/target/SCNN.tar'

model = load_model(model_path)

criterion = torch.nn.CrossEntropyLoss()

valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=4)

avg_loss, accuracy = evaluate(model, valid_loader, criterion)

print(f'Validation Loss: {avg_loss:.4f}, Validation Accuracy: {accuracy:.2f}%')



1
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], Actual: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
2
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], Actual: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
3
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], Actual: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
4
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], Actual: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
5
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], Actual: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
6
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], Actual: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
7
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

KeyboardInterrupt: 

In [27]:
  outputs

tensor([[ 93.6556, -91.3751],
        [ 62.6164, -61.2585],
        [ 61.0631, -59.8304],
        [ 76.0602, -75.1697],
        [ 75.4276, -74.6813],
        [ 52.1948, -51.5274],
        [ 90.6880, -89.5543],
        [ 81.2938, -79.6214],
        [ 75.8083, -74.0372],
        [ 72.5466, -71.5282],
        [ 83.5501, -81.7211],
        [ 74.5293, -73.8056],
        [ 64.0816, -64.0219],
        [ 72.5226, -71.0544],
        [ 62.0638, -61.3653],
        [ 67.8788, -67.4772],
        [ 55.4676, -55.1210],
        [ 69.7264, -68.9437],
        [ 63.6331, -63.0986],
        [ 71.4281, -70.5384],
        [ 95.9360, -94.7494],
        [ 96.9344, -95.7546],
        [ 75.7416, -74.8372],
        [ 92.6511, -91.1014],
        [ 60.2262, -59.7785],
        [ 77.2727, -76.9338],
        [ 64.8411, -64.4841],
        [ 83.0260, -82.3278],
        [ 80.2875, -79.9342],
        [ 83.3400, -81.7932],
        [ 77.5720, -76.8391],
        [ 92.3887, -90.2456]])