# **Pneumonia Detection**
**A machine learning project for detecting pneumonia from chest X-ray images. It includes data preprocessing, feature extraction, and performance evaluation to aid early diagnosis.**

## **Deep Model Tuning**

In [None]:
import numpy as np
import pandas as pd
from sklearn.base import ClassifierMixin
from matplotlib import pyplot as plt
from matplotlib.axes import Axes

KFOLD = 1

### **③ Learning Curve**
Illustrates how the classifier's performance evolves as the volume of training data increases. The red line tracks the training score, whereas the green line represents the cross-validation (test) score. Shaded regions surrounding each line highlight the variability in these scores. A high training score paired with a low validation score may suggest overfitting, while low scores on both curves can indicate underfitting.

In [None]:
from sklearn.model_selection import learning_curve


def plot_learning_curve(
  estimator: ClassifierMixin,
  scorer: callable,
  X_train: np.ndarray,
  y_train: np.ndarray,
  ax: Axes,
):
  _, train_scores, test_scores, _, _ = learning_curve(
    estimator, X_train, y_train, cv=KFOLD, scoring=scorer, return_times=True
  )

  train_sizes = np.linspace(0, 1.0, 5)

  train_scores_mean = np.mean(train_scores, axis=1)
  train_scores_std = np.std(train_scores, axis=1)
  test_scores_mean = np.mean(test_scores, axis=1)
  test_scores_std = np.std(test_scores, axis=1)

  # Plot learning curve
  ax.grid()
  ax.set_xlim(0.0, 1.05)
  ax.set_xlabel("Data Percentage")
  ax.set_ylim(0.0, 1.05)
  ax.set_ylabel("F1 Score")
  ax.fill_between(
    train_sizes,
    train_scores_mean - train_scores_std,
    train_scores_mean + train_scores_std,
    alpha=0.1,
    color="r",
  )
  ax.fill_between(
    train_sizes,
    test_scores_mean - test_scores_std,
    test_scores_mean + test_scores_std,
    alpha=0.1,
    color="g",
  )
  ax.plot(train_sizes, train_scores_mean, "o-", color="r", label="Training score")
  ax.plot(
    train_sizes, test_scores_mean, "o-", color="g", label="Cross-validation score"
  )
  ax.legend(loc="best")
  plot_title = f"{type(estimator).__name__} Learning Curve"
  ax.set_title(plot_title)


### **③ Feature Importance**
Illustrates the average permutation importance of each Local Binary Pattern (LBP) feature for the selected classifier, computed over 30 random shuffles. Taller bars denote features whose perturbation causes the greatest drop in accuracy, highlighting their stronger contribution to the model’s predictive performance.

In [None]:
from sklearn.inspection import permutation_importance


def plot_feature_importance(
  estimator: ClassifierMixin,
  scorer: callable,
  X_test: np.ndarray,
  y_test: np.ndarray,
  ax: Axes,
):
  result = permutation_importance(
    estimator, X_test, y_test, n_repeats=30, scoring=scorer, random_state=42
  )
  pd.DataFrame(result.importances_mean).plot.bar(
    y=0,
    ylabel="Mean Importance",
    use_index=True,
    xlabel="LBP Feature",
    rot=45,
    align="center",
    legend=False,
    grid=True,
    title=f"{type(estimator).__name__} LBP Feature Importance",
    ax=ax,
  )

### **③ Confusion Matrix**
Presents the confusion matrix for the classifier, comparing the actual test labels with the predicted results. The varying shades of blue indicate the frequency of predictions, making it easy to identify areas where the model excels or needs improvement.

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


def plot_confusion_matrix(
  estimator: ClassifierMixin, y_test: np.ndarray, y_guess: np.ndarray, ax: Axes
):
  disp = ConfusionMatrixDisplay(confusion_matrix(y_test, y_guess))
  disp.plot(cmap=plt.cm.Blues, ax=ax)
  disp.ax_.set_title(f"{type(estimator).__name__} Confusion Matrix")
  disp.ax_.set_xlabel("Predicted Label")
  disp.ax_.set_ylabel("True Label")

### **③ AUC-ROC Curve**
Displays the trade-off between the True Positive Rate (TPR) and the False Positive Rate (FPR) across different classification thresholds. The area under the ROC curve (AUC) quantifies the model's ability to distinguish between the classes (the closer the AUC is to 1, the better the performance). The gray dashed line represents a baseline where predictions are equivalent to random guessing.

In [None]:
from sklearn.metrics import roc_curve, auc


def plot_aucroc_curve(
  estimator: ClassifierMixin, X_test: np.ndarray, y_test: np.ndarray, ax: Axes
):
  y_pred_proba = estimator.predict_proba(X_test)[:, 1]
  fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
  roc_auc = auc(fpr, tpr)
  pd.DataFrame({"fpr": fpr, "tpr": tpr}).plot(
    "fpr",
    "tpr",
    xlabel="False Positive Rate",
    ylabel="True Positive Rate",
    label=f"AUC = {roc_auc:.3f}",
    xlim=[0.0, 1.05],
    ylim=[0.0, 1.05],
    grid=True,
    title=f"{type(estimator).__name__} AUC-ROC Curve",
    ax=ax,
  )
  ax.plot([0, 1], [0, 1], color="gray", linestyle="--")
  ax.legend(loc="lower right")

### **③ Precision-Recall Curve**
Shows the trade-off between precision and recall as the decision threshold shifts. The curve's area, expressed as the Average Precision (AP) score, encapsulates the model's overall performance. A higher AP score suggests an excellent balance between precision and recall, a critical measure when dealing with imbalanced datasets. The gray dashed line serves as a baseline for comparison.

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score


def plot_prcrcl_curve(
  estimator: ClassifierMixin, X_test: np.ndarray, y_test: np.ndarray, ax: Axes
):
  y_pred_proba = estimator.predict_proba(X_test)[:, 1]
  precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
  avg_precision = average_precision_score(y_test, y_pred_proba)
  pd.DataFrame({"recall": recall, "precision": precision}).plot(
    "recall",
    "precision",
    xlabel="Recall",
    ylabel="Precision",
    label=f"AP = {avg_precision:.3f}",
    xlim=[0.0, 1.05],
    ylim=[0.0, 1.05],
    grid=True,
    title=f"{type(estimator).__name__} Precision-Recall Curve",
    ax=ax,
  )
  ax.plot([0, 1], [1, 0], color="gray", linestyle="--")
  ax.legend(loc="lower left")


### **② Pretrained Model Finetuning**
Utilizes GridSearchCV with stratified k-fold cross-validation to optimize hyperparameters based on the F1 score. Once the best estimator is selected, multiple plots are generated to diagnose model performance, including the learning curve, confusion matrix, AUC-ROC curve, and precision-recall curve. Finally, the performance metrics (F1, precision, and recall) on the test set is returned.

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score
# from sklearn.model_selection import StratifiedKFold


def finetune_pretrained(
  estimator: ClassifierMixin,
  X_train: np.ndarray,
  y_train: np.ndarray,
  X_test: np.ndarray,
  y_test: np.ndarray,
  axes: np.ndarray,
) -> list:
  # cv = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=42)
  fitted_estimator = estimator.fit(X_train, y_train)
  # plot_learning_curve(fitted_estimator, f1_scorer, X_train, y_train, axes[0])
  y_guess = fitted_estimator.predict(X_test)
  plot_confusion_matrix(fitted_estimator, y_test, y_guess, axes[0])
  plot_aucroc_curve(fitted_estimator, X_test, y_test, ax=axes[1])
  plot_prcrcl_curve(fitted_estimator, X_test, y_test, ax=axes[2])

  return [
    f1_score(y_test, y_guess),
    precision_score(y_test, y_guess),
    recall_score(y_test, y_guess),
  ]

### **① Models for Finetuning**
Defines a collection of model recipes to fine tune.

In [None]:
from _pretrained_model import (
  ConvNeXTPretrainedClassifier,
  EfficientNetPretrainedClassifier,
  ResNetPretrainedClassifier,
  SwinPretrainedClassifier,
  ViTPretrainedClassifier,
)

models = [
  ConvNeXTPretrainedClassifier(
    binary_classification=True,
    class_weight="balanced",
    freeze_pretrained=True,
    random_state=42,
    batch_size=32,
    max_iter=10,
    early_stopping=True,
    n_iter_no_change=3,
    learning_rate=0.003,
    lr_scheduler="reduce_on_plateau",
    lr_scheduler_patience=2,
    lr_scheduler_factor=0.5,
    verbose=True,
  ),
  EfficientNetPretrainedClassifier(
    binary_classification=True,
    class_weight="balanced",
    freeze_pretrained=True,
    random_state=42,
    batch_size=32,
    max_iter=10,
    early_stopping=True,
    n_iter_no_change=3,
    learning_rate=0.003,
    lr_scheduler="reduce_on_plateau",
    lr_scheduler_patience=2,
    lr_scheduler_factor=0.5,
    verbose=True,
  ),
  ResNetPretrainedClassifier(
    binary_classification=True,
    class_weight="balanced",
    freeze_pretrained=True,
    random_state=42,
    batch_size=32,
    max_iter=10,
    early_stopping=True,
    n_iter_no_change=3,
    learning_rate=0.003,
    lr_scheduler="reduce_on_plateau",
    lr_scheduler_patience=2,
    lr_scheduler_factor=0.5,
    verbose=True,
  ),
  SwinPretrainedClassifier(
    binary_classification=True,
    class_weight="balanced",
    freeze_pretrained=True,
    random_state=42,
    batch_size=32,
    max_iter=10,
    early_stopping=True,
    n_iter_no_change=3,
    learning_rate=0.003,
    lr_scheduler="reduce_on_plateau",
    lr_scheduler_patience=2,
    lr_scheduler_factor=0.5,
    verbose=True,
  ),
  ViTPretrainedClassifier(
    binary_classification=True,
    class_weight="balanced",
    freeze_pretrained=True,
    random_state=42,
    batch_size=32,
    max_iter=10,
    early_stopping=True,
    n_iter_no_change=3,
    learning_rate=0.003,
    lr_scheduler="reduce_on_plateau",
    lr_scheduler_patience=2,
    lr_scheduler_factor=0.5,
    verbose=True,
  ),
]

### **① Pneumonia Classification Pipeline**
Loads a pneumonia dataset and splits the data into training and testing sets. Then, create a multi-panel figure to display several diagnostic plots for each classifier in the recipes.

In [None]:
from _data import load_pneumonia_1d


def prepare_pneumonia_dataset_1d():
  # Load data
  train_data, test_data = load_pneumonia_1d(
    include_raw=True, include_hog=False, include_lbp=False, pca_mode="none"
  )

  # Split features and labels
  X_train, y_train = np.split(train_data, [-1], axis=1)
  X_test, y_test = np.split(test_data, [-1], axis=1)

  # Reshape the flattened images back to 2D (reshape to grayscale images)
  X_train_reshaped = X_train.reshape(-1, 224, 224)
  X_test_reshaped = X_test.reshape(-1, 224, 224)

  # Convert to 3-channel RGB by repeating the grayscale channel
  X_train_rgb = np.repeat(X_train_reshaped[:, np.newaxis, :, :], 3, axis=1)
  X_test_rgb = np.repeat(X_test_reshaped[:, np.newaxis, :, :], 3, axis=1)

  # Reformat to match the expected shape (N, H, W, C)
  X_train_final = np.transpose(X_train_rgb, (0, 2, 3, 1))
  X_test_final = np.transpose(X_test_rgb, (0, 2, 3, 1))

  # Squeeze labels to 1D
  y_train = y_train.squeeze()
  y_test = y_test.squeeze()

  return X_train_final, y_train, X_test_final, y_test


fig, axs = plt.subplots(len(models), 3, figsize=(19.5, 6.5 * len(models)))
X_train, y_train, X_test, y_test = prepare_pneumonia_dataset_1d()

test_score = pd.DataFrame(columns=["F1", "Precision", "Recall"])
for index, model in enumerate(models[2:3]):
  print(type(model).__name__)
  test_score.loc[type(model).__name__] = finetune_pretrained(
    model, X_train, y_train, X_test, y_test, axs[index, :]
  )
plt.show()

### **④ Model Performance Comparison**
Compares the performance of various classifier on the testing set with bar chart. Model names are displayed along the x-axis, while their corresponding scores appear on the y-axis.

In [None]:
_, axs = plt.subplots(1, 3, figsize=(19.5, 6.5))
for index, col in enumerate(test_score.columns):
  ax = axs[index]
  test_score.plot.bar(
    y=col,
    use_index=True,
    ylim=[0.0, 1.0],
    legend=False,
    grid=True,
    title=f"Testing Set {col} Score by Model",
    ax=ax,
  )
  ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")