In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:

# General libraries
import os
import numpy as np
import pandas as pd 
import random
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

# Deep learning libraries
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, LeakyReLU, Activation, Lambda, GlobalAveragePooling2D, DepthwiseConv2D, GlobalMaxPooling2D
from tensorflow.keras.layers import Add, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import Adam, SGD
import tensorflow as tf

# Setting seeds for reproducibility
seed = 232
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
input_path = '/content/drive/My Drive/vir vs bact/train'

In [None]:
input_path1 = '/content/drive/My Drive/vir vs bact/test' #Change it as necessary, This is the base path for data

In [None]:
# Distribution of our datasets
n_normal = len(os.listdir(input_path +'/viral pneumonia'))
n_viral = len(os.listdir(input_path +'/bacterial pneumonia'))

print('viral pneumonia images: {}, bacterial pneumonia images: {}'.format(n_normal, n_viral))

viral pneumonia images: 1022, bacterial pneumonia images: 2213


In [None]:
def process_data(img_dims, batch_size):
    # Data generation objects
    train_datagen = ImageDataGenerator(rescale=1./255,
                                   validation_split = 0.2)
    #test_val_datagen = ImageDataGenerator(rescale=1./255)
    
    # This is fed to the network in the specified batch sizes and image dimensions
    train_gen = train_datagen.flow_from_directory(directory=input_path, target_size=(img_dims, img_dims), batch_size=batch_size, class_mode='categorical',shuffle=True,subset='training')

    val_gen = train_datagen.flow_from_directory(directory=input_path, target_size=(img_dims, img_dims), batch_size=batch_size, class_mode='categorical',shuffle=True,subset='validation')
    
    # I will be making predictions off of the test set in one batch size
    # This is useful to be able to get the confusion matrix
    test_data = []
    test_labels = []

    #This code assumes that the name of the folders inside the train folder, test folder and validation folder are named as "PNEUMONIA", "NORMAL" and "COVID19"
    for cond in ['/viral pneumonia/', '/bacterial pneumonia/']:#####################
        for img in (os.listdir(input_path1 + cond)):
            img = cv2.imread(input_path1 + cond+img, 0) #We are taking image in grayscale form. 
            img = cv2.resize(img, (img_dims, img_dims)) #Resizing to fit the train image size
            img = np.dstack([img, img, img])  #Feinting color image channel
            img = img.astype('float32') / 255.0
            if cond=='/viral pneumonia/':
                label = 1
            else:
              label = 0 
            test_data.append(img)
            test_labels.append(label)
        
    test_data = np.array(test_data)
    test_labels = np.array(test_labels)

    
        
    return train_gen,val_gen,test_data, test_labels

In [None]:
img_dims = 227
epochs =50
batch_size = 16

# Getting the data
train_gen,val_gen,test_data, test_labels = process_data(img_dims, batch_size)
test_data.shape

In [None]:
##########MODEL DESCRIPTION GOES HERE ###############
########### THIS IS AlexNet ########## 

num_classes=2

IMAGE_SIZE = [227,227]
inceptionv3 = tf.keras.applications.MobileNet(input_shape = IMAGE_SIZE + [3], weights = 'imagenet', include_top = False,pooling='avg')

inceptionv3.trainable = True

#x = Flatten()(inceptionv3.output)
x=Dense(256,activation='relu')(inceptionv3.output)
x=Dense(128,activation='relu')(x)
x=Dropout(.2)(x)
prediction = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=inceptionv3.input,outputs=prediction)





opt = Adam(lr=0.0001)# Make sure to change the learing rate when training for unfreeze
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(train_gen.classes),
                                                 train_gen.classes)
class_weights=dict(enumerate(class_weights)) 
print(class_weights)

{0: 0.7307909604519774, 1: 1.5832313341493267}


In [None]:
model_name = '2class_mobilenet_bactvir_dataaug_new'#########################################################################

weight_save_path = '/content/drive/My Drive/Results/Weight/'

lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=2, verbose=2, mode='max', min_lr=0.000001)
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=3, mode='min')
checkpoint = ModelCheckpoint(weight_save_path+model_name+'.h5', monitor='val_accuracy', save_best_only=True, save_weights_only=True)

In [None]:
# Fitting the model 
hist = model.fit(train_gen, steps_per_epoch=train_gen.samples // batch_size, 
           epochs=50, validation_data=val_gen, 
           validation_steps=val_gen.samples // batch_size, class_weight=class_weights,callbacks=[lr_reduce, checkpoint])


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()

for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(hist.history[met])
    ax[i].plot(hist.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [None]:
model.save_weights(weight_save_path+model_name+'.h5')

In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(test_data)
y_pred_bool = np.argmax(y_pred, axis=1)
print(np.unique(y_pred_bool))


report = classification_report(test_labels, y_pred_bool, output_dict=True)
print(classification_report(test_labels, y_pred_bool))

In [None]:

from tensorflow.keras.utils import plot_model

############## Make Sure to Change this ####################
plot_save_path = '/content/drive/My Drive/Results/ModelPlot/'
hist_save_path = '/content/drive/My Drive/Results/History/'
result_save_path = '/content/drive/My Drive/Results/Result/'
confusion_matrix_save_path = '/content/drive/My Drive/Results/Confusion Matrix/'


hist_df = pd.DataFrame(hist.history)
hist_csv_file = hist_save_path+model_name+'_history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

plot_model(model, plot_save_path+model_name+'.png', show_shapes=True)

result_df = report
result_df = pd.DataFrame(result_df).transpose()

print(result_df)

result_df.to_csv(result_save_path+model_name+'_result.csv')

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
from seaborn import heatmap
from matplotlib import pyplot as plt

preds = np.argmax(model.predict(test_data), axis=1)

acc = accuracy_score(test_labels, np.round(preds))*100
cm = confusion_matrix(test_labels, np.round(preds))
cm_norm = confusion_matrix(test_labels, np.round(preds), normalize='true')
#tn, fp, fn, tp = cm.ravel()

print('CONFUSION MATRIX ------------------')

ax = heatmap(cm, cmap='Accent', annot=True, xticklabels=['NORMAL', 'PNEUMONIA'], yticklabels=['NORMAL', 'PNEUMONIA'], square=True, fmt='d')

plt.savefig(confusion_matrix_save_path+model_name+'.png')
plt.show()
ax = heatmap(cm_norm, cmap='Accent', annot=True, xticklabels=['NORMAL', 'PNEUMONIA'], yticklabels=['NORMAL', ' PNEUMONIA'], square=True, fmt='f')

plt.savefig(confusion_matrix_save_path+model_name+'_normalized.png')