In [6]:
import matplotlib.pyplot as plt
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import precision_recall_curve, roc_curve, accuracy_score, confusion_matrix, precision_score, recall_score
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
import seaborn as sns 
plt.style.use('fivethirtyeight')
import pickle 
import os 
import numpy as np
import cv2 
%matplotlib inline

In [7]:
categories = ['PNEUMONIA', 'NORMAL']
image_size = 200

def get_training_data(data_directory):
    data = [] 
    for category in categories: 
        path = os.path.join(data_directory, category)
        class_num = categories.index(category)
        for image in os.listdir(path):
            try:
                image_array = cv2.imread(os.path.join(path, image), cv2.IMREAD_GRAYSCALE)
                resized_array = cv2.resize(image_array, (image_size, image_size))
                data.append([resized_array, class_num])
            except Exception as e:
                print(e)
    return np.array(data)


In [None]:
training_data = get_training_data(r'C:\Users\anany\Downloads\archive\chest_xray\train')
testing_data = get_training_data(r'C:\Users\anany\Downloads\archive\chest_xray\train')


In [None]:
pneumonia_count = 0 
normal_count = 0 

for image, label in training_data:
    if label == 0:
        pneumonia_count += 1
    else:
        normal_count += 1
        
print('Pneumonia:', pneumonia_count)
print('Normal:', normal_count)
print('Pneumonia - Normal:', pneumonia_count - normal_count)


In [None]:
plt.imshow(training_data[1][0], cmap='gray')
plt.axis('off')
plt.show()
print(categories[training_data[1][1]])

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

X_data = []
y_data = []

for feature, label in training_data:
    X_data.append(feature)
    y_data.append(label)

for feature, label in testing_data:
    X_data.append(feature)
    y_data.append(label)
    
for feature, label in validation_data:
    X_data.append(feature)
    y_data.append(label)

# Reshape data for deep learning 
X_data = np.array(X_data).reshape(-1, image_size, image_size, 1)
y_data = np.array(y_data)

# Split the data into training, testing, and validation sets
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=32)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20, random_state=32)


In [None]:
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0


In [None]:
datagen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=90,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True
)

datagen.fit(X_train)


In [None]:
model = Sequential()

model.add(Conv2D(256, (3, 3), input_shape=X_train.shape[1:], padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization(axis=1))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization(axis=1))

model.add(Conv2D(16, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization(axis=1))

model.add(Flatten())

model.add(Dropout(0.5))
model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

early_stop = EarlyStopping(patience=3, monitor='val_loss', restore_best_weights=True)
adam = Adam(learning_rate=0.0001)
model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['acc'])

In [None]:
model.summary()

In [None]:
history = model.fit(datagen.flow(X_train, y_train, batch_size=10),
                    callbacks=[early_stop],
                    validation_data=(X_val, y_val),
                    epochs=2)


In [None]:
model.evaluate(X_test, y_test)

In [None]:
plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['acc'])
plt.title('Model Accuracy')
plt.legend(['train'], loc='upper left')
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['loss'])
plt.title('Model Loss')
plt.legend(['train'], loc='upper left')
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['val_acc'])
plt.title('Model Validation Accuracy')
plt.legend(['train'], loc='upper left')
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(history.epoch, history.history['val_loss'])
plt.title('Model Validation Loss')
plt.legend(['train'], loc='upper left')
plt.show()

In [None]:
pred = model.predict(X_train)
precisions, recalls, thresholds = precision_recall_curve(y_train, pred)
fpr, tpr, thresholds2 = roc_curve(y_train, pred)

In [None]:
def plot_precision_recall(precisions, recalls, thresholds):
    plt.plot(thresholds, precisions[:-1], 'b--')
    plt.plot(thresholds, recalls[:-1], 'g-')
    plt.title('Precision vs. Recall')
    plt.xlabel('Thresholds')
    plt.legend(['Precision', 'Recall'], loc='best')
    plt.show()

def plot_roc(fpr, tpr):
    plt.plot(fpr, tpr)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.title('ROC Curve')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.show()

plot_precision_recall(precisions, recalls, thresholds)
plot_roc(fpr, tpr)

In [None]:
predictions = model.predict(X_test)

In [None]:
binary_predictions = []
threshold = thresholds[np.argmax(precisions >= 0.80)]
for i in pred:
    if i >= threshold:
        binary_predictions.append(1)
    else:
        binary_predictions.append(0)


In [None]:
print('Accuracy on testing set:', accuracy_score(binary_predictions, y_test))
print('Precision on testing set:', precision_score(binary_predictions, y_test))
print('Recall on testing set:', recall_score(binary_predictions, y_test))

In [None]:
matrix = confusion_matrix(binary_predictions, y_test)
plt.figure(figsize=(16, 9))
ax = plt.subplot()
sns.heatmap(matrix, annot=True, ax=ax)

# Labels, title, and ticks
ax.set_xlabel('Predicted Labels', size=20)
ax.set_ylabel('True Labels', size=20)
ax.set_title('Confusion Matrix', size=20) 
ax.xaxis.set_ticklabels(categories)
ax.yaxis.set_ticklabels(categories)

plt.show()

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train.reshape(-1, img_size, img_size)[i], cmap='gray')
    if(binary_predictions[i]==y_test[i]):
        plt.xlabel(labels[binary_predictions[i]], color='blue')
    else:
        plt.xlabel(labels[binary_predictions[i]], color='red')
plt.show()

In [None]:
 model.save('pneumonia_detection_ids proj_3.h5')