In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
from glob import glob
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torchvision
from sklearn.model_selection import train_test_split
import pandas as pd

#### Download dataset

In [None]:
# download from google drive
!pip install --upgrade gdown==v4.6.3
!gdown --id '1K7gVVGhxJNLBRoUV9XgGn9gai6GkpTeP' --output pneumonia-kaggle.zip

Downloading...
From: https://drive.google.com/uc?id=1w0ldWmLkbaypadIDiFexve3bW1IJuosc
To: /content/pneumonia-kaggle.zip
1.22GB [00:20, 58.3MB/s]


In [None]:
# unzip dataset file
!unzip -q pneumonia-kaggle.zip

#### Prepare dataset


In [None]:
IMG_SIZE = 200
BATCH_SIZE = 64

all_class = ['normal', 'bacteria', 'virus']
class_map = {cls:i for i,cls in enumerate(all_class)} #  'normal':0, 'bacteria': 1, 'virus':2
class_map

In [None]:
# read all paths
img_paths_all = glob('pneumonia-kaggle/train/*/*.jpeg')
img_paths_test = sorted(glob('pneumonia-kaggle/test/*.jpeg'))
img_paths_train, img_paths_val = train_test_split(img_paths_all, test_size=0.2)

In [None]:
# number of images
len(img_paths_train), len(img_paths_val), len(img_paths_test)

In [None]:
class PneumoniaDataset(torch.utils.data.Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform
        self.class_map = {
            'normal': 0,
            'bacteria': 1,
            'virus': 2
        }

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        img = Image.open(path).convert("RGB")
        img = self.transform(img)

        # read label
        folder = path.split('/')[-2]
        if folder == 'NORMAL':
            class_name = folder.lower()
        else:
            class_name = img_path.split('/')[-1].split('_')[1]
        label = self.class_map[class_name]
        label = torch.tensor(label, dtype=torch.long)

        return img, label

#### Build model

#### Training

#### Make Kaggle Submission

https://www.kaggle.com/c/pneumonia-sai2

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        img = Image.open(path).convert("RGB")
        img = self.transform(img)
        return img

test_paths = sorted(glob('pneumonia-kaggle/test/*.jpeg'))
test_ds = TestDataset(test_paths, transforms)
test_loader = torch.utils.data.DataLoader(test_ds, BS)

In [None]:
model.load_state_dict(torch.load('best.pth'))
_ = model.eval().to(device)

In [None]:
# Inference
y_pred = []
with torch.no_grad():
    for x, y in tqdm(test_loader):
        x = x.to(device)
        pred = model(x)
        y_pred.append(pred.argmax(dim=1))

y_pred = torch.cat(y_pred, dim=0).cpu().numpy()

In [None]:
df = pd.DataFrame()
df['Id'] = [f'{i:05d}.jpeg' for i in range(len(img_paths_test))]
df['Category'] = y_pred.astype(int)
df.to_csv('submission.csv', index=None)
df