In [2]:
!pip install efficientnet_pytorch

[0m

In [47]:
import os
import torch
import random
import urllib3
import numpy as np
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from torchvision import transforms
from torch.nn import functional as F
from pytorch_lightning import Trainer
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from efficientnet_pytorch import EfficientNet
from pytorch_lightning.callbacks import ModelCheckpoint
from transformers import get_cosine_schedule_with_warmup

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# 데이터 경로
data_path = '/kaggle/input/chest-xray-pneumonia/chest_xray/'
# 훈련, 검증, 테스트 경로 설정
train_path = data_path + 'train/'
valid_path = data_path + 'val/'
test_path = data_path + 'test/'

In [6]:
# 훈련 데이터용 변환기
transform_train = transforms.Compose([
    transforms.Resize((250, 250)), # 이미지 크기 조정
    transforms.CenterCrop(180), # 중앙 이미지 확대
    transforms.RandomHorizontalFlip(0.5), # 좌우 대칭
    transforms.RandomVerticalFlip(0.2), # 상하 대칭
    transforms.RandomRotation(20), # 이미지 회전
    transforms.ToTensor(), # 텐서 객체로 변환
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225)) # 정규화
])

# 테스트 데이터용 변환기
transform_test = transforms.Compose([
    transforms.Resize((250, 250)),
    transforms.CenterCrop(180),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])

In [7]:
datasets_train = ImageFolder(root=train_path, transform=transform_train)
datasets_valid = ImageFolder(root=valid_path, transform=transform_test)

In [8]:
batch_size = 8

loader_train = DataLoader(dataset=datasets_train, batch_size=batch_size, shuffle=True)
loader_valid = DataLoader(dataset=datasets_valid, batch_size=batch_size, shuffle=False)

In [81]:
epochs= 20

# train 시킬떄 순서 training_step -> validataion_step
class Improved_EfficientNet(pl.LightningModule):
    def __init__(self, num_classes, model_name):
        super().__init__()
        self.model = EfficientNet.from_pretrained(model_name, num_classes=2).to(device) 
        
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        self.log('val_loss', loss)  # Log the validation loss
        return loss
    
    def configure_optimizers(self):    
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=0.0006, weight_decay=0.001)
        scheduler = get_cosine_schedule_with_warmup(
            optimizer, 
            num_warmup_steps=len(loader_train)*3,
            num_training_steps=len(loader_train)*epochs
        )

        return {
            'optimizer': optimizer,
            'lr_scheduler': scheduler,
        }
    
    def train_dataloader(self):
        return loader_train

    def val_dataloader(self):
        return loader_valid

In [82]:
model_b1 = Improved_EfficientNet(num_classes=2, model_name='efficientnet-b1')
model_b2 = Improved_EfficientNet(num_classes=2, model_name='efficientnet-b2')
model_b3 = Improved_EfficientNet(num_classes=2, model_name='efficientnet-b3')

Loaded pretrained weights for efficientnet-b1


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b2-8bb594d6.pth
100%|██████████| 35.1M/35.1M [00:00<00:00, 50.7MB/s]


Loaded pretrained weights for efficientnet-b2


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b3-5fb5a3c3.pth
100%|██████████| 47.1M/47.1M [00:01<00:00, 48.5MB/s]


Loaded pretrained weights for efficientnet-b3


In [85]:
checkpoint_callback_b1 = ModelCheckpoint(monitor="val_loss", mode="min", 
    dirpath="/kaggle/working/models",filename="efficientnet-b1")
checkpoint_callback_b2 = ModelCheckpoint(monitor="val_loss", mode="min", 
    dirpath="/kaggle/working/models",filename="efficientnet-b2")
checkpoint_callback_b3 = ModelCheckpoint(monitor="val_loss", mode="min", 
    dirpath="/kaggle/working/models",filename="efficientnet-b3")

In [86]:
trainer_b1 = Trainer(devices=1, accelerator="gpu", max_epochs=epochs,callbacks=[checkpoint_callback_b1])
trainer_b1.fit(model_b1)

trainer_b2 = Trainer(devices=1, accelerator="gpu", max_epochs=epochs,callbacks=[checkpoint_callback_b2])
trainer_b2.fit(model_b2)

trainer_b3 = Trainer(devices=1, accelerator="gpu", max_epochs=epochs,callbacks=[checkpoint_callback_b3])
trainer_b3.fit(model_b3)

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [90]:
model_b1 = Improved_EfficientNet.load_from_checkpoint("/kaggle/working/models/efficientnet-b1.ckpt",
num_classes=2, model_name='efficientnet-b1')
model_b2 = Improved_EfficientNet.load_from_checkpoint("/kaggle/working/models/efficientnet-b2.ckpt",
num_classes=2, model_name='efficientnet-b2')
model_b3 = Improved_EfficientNet.load_from_checkpoint("/kaggle/working/models/efficientnet-b3.ckpt",
num_classes=2, model_name='efficientnet-b3')

Loaded pretrained weights for efficientnet-b1
Loaded pretrained weights for efficientnet-b2
Loaded pretrained weights for efficientnet-b2


In [91]:
datasets_test = ImageFolder(root=test_path, transform=transform_test)
loader_test = DataLoader(dataset=datasets_test, batch_size=batch_size, shuffle=False)

In [92]:
model_b1.to(device)
model_b2.to(device)
model_b3.to(device)

Improved_EfficientNet(
  (model): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d((0, 1, 0, 1))
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d((1, 1, 1, 1))
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_project_conv): Conv2dStaticSam

In [93]:
from sklearn.metrics import accuracy_score # 정확도 계산 함수
from sklearn.metrics import recall_score # 재현율 계산 함수
from sklearn.metrics import f1_score # F1 점수 계산 함수

In [94]:
def evaluate_model(model, name):
    trues_list = []
    preds_list = []
    
    model.eval()
    for images, labels in loader_test:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)

        preds = torch.max(outputs.cpu(), dim=1)[1].numpy() # 에측값
        trues = labels.cpu().numpy() # 실젯값

        preds_list.extend(preds)
        trues_list.extend(trues)
    print(f'##### {name} 최종 예측 결과 평가 점수 #####')
    print(f'정확도 : {accuracy_score(trues_list, preds_list):.4f}')
    print(f'재현율 : {recall_score(trues_list, preds_list):.4f}')
    print(f'F1 점수 :{f1_score(trues_list, preds_list):.4f}')
    
    return preds_list

In [95]:
model_b1_preds = evaluate_model(model_b1, 'EfficientNet-b1')

##### EfficientNet-b1 최종 예측 결과 평가 점수 #####
정확도 : 0.8510
재현율 : 0.9949
F1 점수 :0.8930


In [96]:
 model_b2_preds = evaluate_model(model_b2, 'EfficientNet-b2')

##### EfficientNet-b2 최종 예측 결과 평가 점수 #####
정확도 : 0.9151
재현율 : 0.9821
F1 점수 :0.9353


In [97]:
model_b3_preds = evaluate_model(model_b3, 'EfficientNet-b3')

##### EfficientNet-b3 최종 예측 결과 평가 점수 #####
정확도 : 0.9151
재현율 : 0.9821
F1 점수 :0.9353


In [100]:
ensemble_preds = []
true_list = []
for i in range(len(model_b1_preds)):
    pred_element = np.round((model_b1_preds[i] + model_b2_preds[i] + model_b3_preds[i])/3)
    ensemble_preds.append(pred_element)


for _, labels in loader_test:
    labels = labels.to(device)
    trues = labels.cpu().numpy() # 실젯값
    true_list.extend(trues)
    
print(f'##### 앙상블 최종 예측 결과 평가 점수 #####')
print(f'정확도 : {accuracy_score(true_list, ensemble_preds):.4f}')
print(f'재현율 : {recall_score(true_list, ensemble_preds):.4f}')
print(f'F1 점수 :{f1_score(true_list, ensemble_preds):.4f}')

##### 앙상블 최종 예측 결과 평가 점수 #####
정확도 : 0.9151
재현율 : 0.9821
F1 점수 :0.9353
