In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
'''
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

"\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))\n"

## Preprocess data using ImageFolder and DataLoader

In [27]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch
import torchvision

num_classes = 2
transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = ImageFolder(root = '../input/chest-xray-pneumonia/chest_xray/train', transform = transform)
train_dataset = DataLoader(train_dataset, batch_size = 16, shuffle = True, num_workers = 2)

test_dataset = ImageFolder(root = '../input/chest-xray-pneumonia/chest_xray/test', transform = transform)
test_dataset = DataLoader(test_dataset, batch_size = 16, shuffle = True, num_workers = 2)

val_dataset = ImageFolder(root = '../input/chest-xray-pneumonia/chest_xray/val', transform = transform)
val_dataset = DataLoader(val_dataset, batch_size = 16, shuffle = True, num_workers = 2)

## Construct model by using resnet34 from torchvision

In [40]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25ldone
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=77c02f370bd4f73c7ea012edb731dee17f853c82ad2c33e619d2a2b2e59e454b
  Stored in directory: /root/.cache/pip/wheels/0e/cc/b2/49e74588263573ff778da58cc99b9c6349b496636a7e165be6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [53]:
import torch.nn as nn
from torchsummary import summary
from efficientnet_pytorch import EfficientNet
from torch.optim import Adam, lr_scheduler

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
print(device)

model = EfficientNet.from_pretrained('efficientnet-b7', num_classes = num_classes).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = Adam(model.parameters(), lr = 0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size = 7, gamma = 0.1)

cuda
Loaded pretrained weights for efficientnet-b7


## train

In [57]:
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

n_epoch = 20

for epoch in range(n_epoch) :
    #train
    epoch_acc = 0
    epoch_loss = 0
    model.train()
    for x, y in tqdm(train_dataset) :
        x = x.to(device)
        y = y.to(device)
        
        predict = model(x)
        loss = criterion(predict, y)
        epoch_loss += loss
        correct_pred = torch.argmax(predict, 1) == y
        correct_pred = correct_pred.sum()
        epoch_acc += correct_pred / len(y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    epoch_acc = epoch_acc / len(train_dataset)
    epoch_loss = epoch_loss / len(train_dataset)
    
    #test
    epoch_val_acc = 0
    epoch_val_loss = 0
    model.eval()
    with torch.no_grad() :
        for x, y in tqdm(test_dataset) :
            x = x.to(device)
            y = y.to(device)
            
            predict = model(x)
            loss = criterion(predict, y)
            epoch_val_loss += loss
            correct_pred = torch.argmax(predict, 1) == y
            correct_pred = correct_pred.sum()
            epoch_val_acc += correct_pred / len(y)
            
    epoch_val_acc = epoch_val_acc / len(test_dataset)
    epoch_val_loss = epoch_val_loss / len(test_dataset)
    
    print('{0} / {1} : train_loss : {2:.4f}, train_acc : {3:.4f}, val_loss : {4:.4f}, val_acc : {5:.4f}'.format(epoch+1, n_epoch, epoch_loss, epoch_acc, epoch_val_loss, epoch_val_acc))
    
    if (epoch_val_acc > 0.90 or epoch_acc > 0.98) :
        print('early stop')
        break
    scheduler.step()

100%|██████████| 326/326 [03:23<00:00,  1.60it/s]
100%|██████████| 39/39 [00:10<00:00,  3.58it/s]

1 / 20 : train_loss : 0.0334, train_acc : 0.9889, val_loss : 0.8310, val_acc : 0.8446
early stop





## Check the performance using valid_dataset

In [62]:
model.eval()

valid_acc = 0
valid_loss = 0
with torch.no_grad() :
    for x, y in tqdm(val_dataset) :
        x = x.to(device)
        y = y.to(device)

        predict = model(x)
        loss = criterion(predict, y)
        valid_loss += loss
        correct_pred = torch.argmax(predict, 1) == y
        correct_pred = correct_pred.sum()
        valid_acc += correct_pred / len(y)
    
    valid_acc = valid_acc / len(val_dataset)
    valid_loss = valid_loss / len(val_dataset)

print('valid_loss : {0:.4f}, valid_acc : {1:.4f}'.format(valid_loss, valid_acc))

100%|██████████| 1/1 [00:00<00:00,  1.88it/s]

valid_loss : 0.0318, valid_acc : 1.0000



