# **Pneumonia Detection**
**A machine learning project for detecting pneumonia from chest X-ray images. It includes data preprocessing, feature extraction, and performance evaluation to aid early diagnosis.**

## Data Preprocess

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

### Prepare Data Table

In [2]:
for phase in ["train", "test"]:
  if not Path(f"./data/pneumonia_{phase}_raw.csv").is_file():
    from skimage import io, color, transform
    from tqdm import tqdm

    image_paths = [str(posix) for posix in list(Path(f"./data/{phase}/").rglob("*.jpeg"))]

    image_pixels = []
    for image_path in tqdm(image_paths):
      image_file = io.imread(image_path)
      if len(image_file.shape) == 3:
        image_file = color.rgb2gray(image_file)
      image_file = transform.resize(image_file, (224,224), preserve_range=True)
      is_pneumonia = "PNEUMONIA" in image_path
      image_pixels.append(np.append(image_file.flatten(), int(is_pneumonia)))
    df_pneumonia = pd.DataFrame(image_pixels, columns=range(50177))
    df_pneumonia.to_csv(f"./data/pneumonia_{phase}_raw.csv")

100%|██████████| 624/624 [00:15<00:00, 41.44it/s]


### Prepare HOG Table

In [3]:
for phase in ["train", "test"]:
  if not Path(f"./data/pneumonia_{phase}_hog.csv").is_file():
    from skimage import io, color, transform, feature
    from tqdm import tqdm

    image_paths = [str(posix) for posix in list(Path(f"./data/{phase}/").rglob("*.jpeg"))]

    image_hogs = []
    for image_path in tqdm(image_paths):
      image_file = io.imread(image_path)
      if len(image_file.shape) == 3:
        image_file = color.rgb2gray(image_file)
      image_file = transform.resize(image_file, (224,224), preserve_range=True)
      image_hog = feature.hog(
        image_file,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        transform_sqrt=True,
        feature_vector=True,
        visualize=False
      )
      is_pneumonia = "PNEUMONIA" in image_path
      image_hogs.append(np.append(image_hog, int(is_pneumonia)))
    df_pneumonia_hog = pd.DataFrame(image_hogs, columns=range(26245))
    df_pneumonia_hog.to_csv(f"./data/pneumonia_{phase}_hog.csv")

100%|██████████| 624/624 [00:19<00:00, 32.18it/s]


### Prepare LBP Table

In [4]:
for phase in ["train", "test"]:
  if not Path(f"./data/pneumonia_{phase}_lbp.csv").is_file():
    from skimage import io, color, transform, feature
    from tqdm import tqdm

    image_paths = [str(posix) for posix in list(Path(f"./data/{phase}/").rglob("*.jpeg"))]

    image_lbps = []
    for image_path in tqdm(image_paths):
      image_file = io.imread(image_path)
      if len(image_file.shape) == 3:
        image_file = color.rgb2gray(image_file)
      image_file = transform.resize(image_file, (224,224), preserve_range=True).astype(np.uint8)
      image_lbp = feature.local_binary_pattern(image_file,24,3,"uniform")
      image_hist, _ = np.histogram(image_lbp.ravel(), bins=26, range=(0, 26), density=True)
      image_hist = image_hist.astype('float')
      image_hist /= (image_hist.sum() + 1e-6)
      is_pneumonia = "PNEUMONIA" in image_path
      image_lbps.append(np.append(image_hist, int(is_pneumonia)))
    df_pneumonia_lbp = pd.DataFrame(image_lbps, columns=range(27))
    df_pneumonia_lbp.to_csv(f"./data/pneumonia_{phase}_lbp.csv")

100%|██████████| 624/624 [00:17<00:00, 35.38it/s]
