In [None]:
import os
import pickle
import zipfile

import random
import cv2
import numpy as np
import pandas as pd
import keras
import tensorflow as tf

import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.decomposition import PCA
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

from keras.utils import to_categorical
from keras.src.legacy.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras import Sequential
from keras.models import Model
from keras.layers import InputLayer, Conv2D, MaxPool2D
from keras.layers import Flatten, GlobalAveragePooling2D, GlobalMaxPooling2D, Concatenate
from keras.layers import Dense, Dropout, BatchNormalization
from keras.regularizers import l1, l2
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.saving import load_model

In [None]:
def random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    keras.utils.set_random_seed(seed)

    os.environ["PYTHONHASHSEED"] = "42"

In [None]:
random_seed(42)

In [None]:
def fdr(y_true, y_pred):
    FP = np.sum((y_true == 0) & (y_pred == 1))
    TP = np.sum((y_true == 1) & (y_pred == 1))
    
    fdr_ = FP / (FP + TP) if (FP + TP) > 0 else 0
    
    return fdr_

In [None]:
def fnr(y_true, y_pred):
    FN = np.sum((y_true == 1) & (y_pred == 0))
    TP = np.sum((y_true == 1) & (y_pred == 1))
    
    fnr_ = FN / (FN + TP) if (FN + TP) > 0 else 0
    
    return fnr_

In [None]:
def specificity(y_true, y_pred):
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    
    specificity_ = TN / (TN + FP) if (TN + FP) > 0 else 0
    
    return specificity_

In [None]:
def npv(y_true, y_pred):
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    
    npv_ = TN / (TN + FN) if (TN + FN) > 0 else 0
    
    return npv_

In [None]:
train_cxr = pd.read_csv('/kaggle/input/pneumonia-detection-features-datasets/train/cxr.csv')
train_ch0 = pd.read_csv('/kaggle/input/pneumonia-detection-features-datasets/train/segment.csv')
train_ch1 = pd.read_csv('/kaggle/input/pneumonia-detection-features-datasets/train/segment_with_convexhull.csv')

test_cxr = pd.read_csv('/kaggle/input/pneumonia-detection-features-datasets/test/cxr.csv')
test_ch0 = pd.read_csv('/kaggle/input/pneumonia-detection-features-datasets/test/segment.csv')
test_ch1 = pd.read_csv('/kaggle/input/pneumonia-detection-features-datasets/test/segment_with_convexhull.csv')

In [None]:
x_train_cxr = train_cxr.drop(['class'], axis=1).to_numpy()
x_train_ch0 = train_ch0.drop(['class'], axis=1).to_numpy()
x_train_ch1 = train_ch1.drop(['class'], axis=1).to_numpy()
y_train = train_cxr['class'].to_numpy()

x_test_cxr = test_cxr.drop(['class'], axis=1).to_numpy()
x_test_ch0 = test_ch0.drop(['class'], axis=1).to_numpy()
x_test_ch1 = test_ch1.drop(['class'], axis=1).to_numpy()
y_test = test_cxr['class'].to_numpy()

print(np.shape(x_train_cxr))
print(np.shape(x_train_ch0))
print(np.shape(x_train_ch1))
print(np.shape(y_train))
print(np.shape(x_test_cxr))
print(np.shape(x_test_ch0))
print(np.shape(x_test_ch1))
print(np.shape(y_test))

In [None]:
indices_0 = np.where(y_train == 0)[0]
indices_1 = np.where(y_train == 1)[0]

random_seed(42)
random_indices_0 = np.random.choice(indices_0, size=250, replace=0)
random_indices_1 = np.random.choice(indices_1, size=350, replace=0)
random_indices = np.concatenate((random_indices_0, random_indices_1), axis=0)

x_val_cxr = x_train_cxr[random_indices]
x_val_ch0 = x_train_ch0[random_indices]
x_val_ch1 = x_train_ch1[random_indices]
y_val = y_train[random_indices]

x_train_cxr = np.delete(x_train_cxr, random_indices, axis=0)
x_train_ch0 = np.delete(x_train_ch0, random_indices, axis=0)
x_train_ch1 = np.delete(x_train_ch1, random_indices, axis=0)
y_train = np.delete(y_train, random_indices, axis=0)

print(np.shape(x_train_cxr))
print(np.shape(x_train_ch0))
print(np.shape(x_train_ch1))
print(np.shape(y_train))
print(np.shape(x_val_cxr))
print(np.shape(x_val_ch0))
print(np.shape(x_val_ch1))
print(np.shape(y_val))
print(np.shape(x_test_cxr))
print(np.shape(x_test_ch0))
print(np.shape(x_test_ch1))
print(np.shape(y_test))

In [None]:
classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight="balanced", classes=classes, y=y_train)

class_weight_dict = {cls: weight for cls, weight in zip(classes, class_weights)}

print(class_weight_dict)

In [None]:
y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)

print(np.shape(x_train_cxr))
print(np.shape(x_train_ch0))
print(np.shape(x_train_ch1))
print(np.shape(y_train))
print(np.shape(x_val_cxr))
print(np.shape(x_val_ch0))
print(np.shape(x_val_ch1))
print(np.shape(y_val))
print(np.shape(x_test_cxr))
print(np.shape(x_test_ch0))
print(np.shape(x_test_ch1))
print(np.shape(y_test))

In [None]:
random_seed(42)

model = Sequential([
    InputLayer(shape=(8192,)),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=4e-6),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    x=x_train_cxr, 
    y=y_train,
    validation_data=(x_val_cxr, y_val),
    batch_size=32,
    epochs=100,
    class_weight=class_weight_dict
)

y_pred = model.predict(x_test_cxr, verbose=False)
y_pred = np.argmax(y_pred, axis=1).reshape(-1)

print('accuracy = {}'.format(accuracy_score(y_test, y_pred)))
print('precision = {}'.format(precision_score(y_test, y_pred)))
print('FDR = {}'.format(fdr(y_test, y_pred)))
print('recall = {}'.format(recall_score(y_test, y_pred)))
print('FNR = {}'.format(fnr(y_test, y_pred)))
print('specificity = {}'.format(specificity(y_test, y_pred)))
print('NPV = {}'.format(npv(y_test, y_pred)))
print('f1-score = {}'.format(f1_score(y_test, y_pred)))
print('AUC = {}'.format(roc_auc_score(y_test, y_pred)))
print('MCC = {}'.format(matthews_corrcoef(y_test, y_pred)))

In [None]:
random_seed(42)

model = Sequential([
    InputLayer(shape=(8192,)),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=4e-6),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    x=x_train_ch0, 
    y=y_train,
    validation_data=(x_val_ch0, y_val),
    batch_size=32,
    epochs=100,
    class_weight=class_weight_dict
)

y_pred = model.predict(x_test_ch0, verbose=False)
y_pred = np.argmax(y_pred, axis=1).reshape(-1)

print('accuracy = {}'.format(accuracy_score(y_test, y_pred)))
print('precision = {}'.format(precision_score(y_test, y_pred)))
print('FDR = {}'.format(fdr(y_test, y_pred)))
print('recall = {}'.format(recall_score(y_test, y_pred)))
print('FNR = {}'.format(fnr(y_test, y_pred)))
print('specificity = {}'.format(specificity(y_test, y_pred)))
print('NPV = {}'.format(npv(y_test, y_pred)))
print('f1-score = {}'.format(f1_score(y_test, y_pred)))
print('AUC = {}'.format(roc_auc_score(y_test, y_pred)))
print('MCC = {}'.format(matthews_corrcoef(y_test, y_pred)))

In [None]:
random_seed(42)

model = Sequential([
    InputLayer(shape=(8192,)),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=4e-6),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    x=x_train_ch1, 
    y=y_train,
    validation_data=(x_val_ch1, y_val),
    batch_size=32,
    epochs=100,
    class_weight=class_weight_dict
)

y_pred = model.predict(x_test_ch1, verbose=False)
y_pred = np.argmax(y_pred, axis=1).reshape(-1)

print('accuracy = {}'.format(accuracy_score(y_test, y_pred)))
print('precision = {}'.format(precision_score(y_test, y_pred)))
print('FDR = {}'.format(fdr(y_test, y_pred)))
print('recall = {}'.format(recall_score(y_test, y_pred)))
print('FNR = {}'.format(fnr(y_test, y_pred)))
print('specificity = {}'.format(specificity(y_test, y_pred)))
print('NPV = {}'.format(npv(y_test, y_pred)))
print('f1-score = {}'.format(f1_score(y_test, y_pred)))
print('AUC = {}'.format(roc_auc_score(y_test, y_pred)))
print('MCC = {}'.format(matthews_corrcoef(y_test, y_pred)))