In [None]:
# Dataset
data_path = '/kaggle/input/chest-xray-pneumonia/chest_xray/'

train_path = data_path + 'train/'
valid_path = data_path + 'val/'
test_path = data_path + 'test/'

In [None]:
from glob import glob

# Provide all the files under each path
print(f'Number of Training Data: {len(glob(train_path + "*/*"))}')
print(f'Number of Validation Data: {len(glob(valid_path + "*/*"))}')
print(f'Number of Test Data: {len(glob(test_path + "*/*"))}')

In [None]:
all_normal_imgs = []
all_pneumonia_imgs = []

for cat in ['train/', 'val/', 'test/']:
    data_cat_path = data_path + cat
    # dir of normal or pneumonia images    
    normal_imgs = glob(data_cat_path + 'NORMAL/*')
    pneumonia_imgs = glob(data_cat_path + 'PNEUMONIA/*')
    # Add to the corresponding list
    all_normal_imgs.extend(normal_imgs)
    all_pneumonia_imgs.extend(pneumonia_imgs)
    
print(f'Number of Normal Chest X-ray Images: {len(all_normal_imgs)}')
print(f'Number of Pneumonia Chest X-ray Images: {len(all_pneumonia_imgs)}')

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

mpl.rc('font', size = 15)
plt.figure(figsize = (7, 7))

label = ['Normal', 'Pneumonia']

plt.pie([len(all_normal_imgs), len(all_pneumonia_imgs)],
    labels = label,
    autopct = '%.1f%%')

In [None]:
import matplotlib.gridspec as gridspec
import cv2

def show_image(img_paths, rows = 2, cols = 3):
    # Error if more images exist then the rows * cols
    assert len(img_paths) <= rows * cols
    
    mpl.rc('font', size = 9)
    plt.figure(figsize = (15, 8))
    grid = gridspec.GridSpec(rows, cols)
    
    for idx, img_path in enumerate(img_paths):
        image = cv2.imread(img_path)
        ax = plt.subplot(grid[idx])
        ax.imshow(image)

In [None]:
# Show the last 6 images
num_of_imgs = 6

normal_img_paths = all_normal_imgs[-num_of_imgs:]
show_image(normal_img_paths)

In [None]:
pneumonia_img_paths = all_pneumonia_imgs[-num_of_imgs:]
show_image(pneumonia_img_paths)