In [1]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras.preprocessing import image 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import os

Using TensorFlow backend.


In [2]:
df = pd.read_csv('CombinedImages/CombinedUpdated.csv')
na_fill = {'VirusCategory1': 'Normal'}
df = df.fillna(value = na_fill)
df = df.replace('E.Coli', 'Bacterial')
df = df.replace('COVID-19, ARDS', 'COVID-19')
df = df.replace('Mycoplasma Bacterial Pneumonia', 'Bacterial')
df = df.replace('Klebsiella', 'Bacterial')
df = df.replace('Legionella', 'Bacterial')
df = df.replace('Chlamydophila', 'Bacterial')
df = df.replace('Pneumocystis', 'Fungal')
df = df.replace('Streptococcus', 'Bacterial')

# df.VirusCategory1 = df.VirusCategory1.str.strip()
pneu_types = ['Normal', 'COVID-19', 'Bacterial', 'Fungal', 'SARS']
df = df.join(pd.get_dummies(df.VirusCategory1.values, prefix = 'type'))

df = df[['ImagePath', 'VirusCategory1']+[f'type_{i}' for i in pneu_types]]
df = df[df.VirusCategory1.isin(pneu_types)]
df.head(3)

Unnamed: 0,ImagePath,VirusCategory1,type_Normal,type_COVID-19,type_Bacterial,type_Fungal,type_SARS
0,IM-0128-0001.jpeg,Normal,1,0,0,0,0
1,IM-0127-0001.jpeg,Normal,1,0,0,0,0
2,IM-0125-0001.jpeg,Normal,1,0,0,0,0


In [3]:
df.VirusCategory1.value_counts()

Normal       5841
COVID-19      512
Bacterial      47
Fungal         17
SARS           16
Name: VirusCategory1, dtype: int64

In [4]:
random_state = 10
X = df[['ImagePath', 'VirusCategory1']]
y = df[[f'type_{i}' for i in pneu_types]]
x_train, x_test, y_train, y_test = train_test_split(X,y, train_size = .90, stratify = X.VirusCategory1.values)

In [5]:
x_train = x_train.drop('VirusCategory1', axis = 1)
x_test = x_test.drop('VirusCategory1', axis = 1)
x_test.shape

(644, 1)

In [6]:
def get_image_value(path): 
    img = image.load_img(path, target_size = (28,28,1))
    img = image.img_to_array(img)/255
    return img 


def get_data(df): 
    img_list = [] 
    for path in tqdm(df.ImagePath.values):
        path = f'CombinedImages/all/{path}'
        img_list.append(get_image_value(path)) 
    return np.array(img_list).squeeze()

x_test = get_data(x_test)
x_train = get_data(x_train)


100%|██████████| 644/644 [00:08<00:00, 74.86it/s]
100%|██████████| 5789/5789 [01:20<00:00, 71.55it/s]


In [7]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D 

In [8]:
def get_conv_model(x, y): 
    drop = .2 
    
    model = Sequential() 
    
    model.add(Conv2D(32, kernel_size=(3, 3),activation='relu', input_shape = (28,28,3)))
    
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(drop))
    
    model.add(Flatten())
    
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(drop))
    
    model.add(Dense(5, activation='softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model 

In [9]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# y_train = y_train.values
# y_test = y_test.values
early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1)
model_checkpoint = ModelCheckpoint('models/ModelCheckpointWeights.h5', verbose=1, save_best_only=True)

epochs = 3
batch_size = 1
model = get_conv_model(x_train, y_train)
model_history = model.fit(x_train, y_train, epochs = epochs, batch_size = batch_size, verbose = 1, 
         callbacks = [early_stopping, model_checkpoint], validation_data = (x_test, y_test))

Epoch 1/3
Epoch 00001: val_loss improved from inf to 0.12455, saving model to models/ModelCheckpointWeights.h5
Epoch 2/3
Epoch 00002: val_loss improved from 0.12455 to 0.10057, saving model to models/ModelCheckpointWeights.h5
Epoch 3/3
Epoch 00003: val_loss improved from 0.10057 to 0.09775, saving model to models/ModelCheckpointWeights.h5


In [35]:
tester_img = get_image_value('TestImages/Normal.jpg')
tester_img = np.reshape(tester_img, (1, 28,28,3))
tester_img.shape

(1, 28, 28, 3)

['Normal', 'COVID-19', 'Bacterial', 'Fungal', 'SARS']
['Normal', 'COVID-19', 'Bacterial']

In [36]:
labels = ['Normal', 'COVID-19', 'Bacterial', 'Fungal', 'SARS']
#labels = ['Normal', 'COVID-19', 'Bacterial']


predict = model.predict(tester_img)
print(predict)

[[7.3586494e-01 2.0121859e-01 1.6620459e-02 4.5772363e-02 5.2361906e-04]]


In [37]:
labels[np.argmax(predict)]

'Normal'