In [6]:
import numpy as np
import pandas as pd
import os
import PIL
import cv2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import shutil
from sklearn.metrics import confusion_matrix, classification_report

In [7]:
train_df = pd.read_csv('capstone covid data/train.txt', sep=" ", header=None)

In [8]:
train_df.columns=['patient id', 'filename', 'class', 'data source']

In [9]:
train_df=train_df.drop(['patient id', 'data source'], axis=1 )

In [10]:
train_df.head()

Unnamed: 0,filename,class
0,ARDSSevere.png,negative
1,acute-respiratory-distress-syndrome-ards-1.jpg,negative
2,acute-respiratory-distress-syndrome-ards.jpg,negative
3,ards-secondary-to-tiger-snake-bite.png,negative
4,pneumocystis-pneumonia-2-PA.png,negative


In [11]:
test_df = pd.read_csv('Capstone covid data/test.txt', sep=" ", header=None)
test_df.columns=['id', 'filename', 'class', 'data source' ]
test_df=test_df.drop(['id', 'data source'], axis=1 )

In [12]:
test_df.head()

Unnamed: 0,filename,class
0,MIDRC-RICORD-1C-419639-003251-46647-0.png,positive
1,MIDRC-RICORD-1C-419639-001464-39871-0.png,positive
2,MIDRC-RICORD-1C-419639-000918-78965-0.png,positive
3,MIDRC-RICORD-1C-419639-003318-64285-0.png,positive
4,MIDRC-RICORD-1C-419639-001015-81591-0.png,positive


In [13]:
train_df['class'].value_counts()

negative    13793
positive     2158
Name: class, dtype: int64

In [14]:
negative  = train_df[train_df['class']=='negative']  
positive = train_df[train_df['class']=='positive'] 
from sklearn.utils import resample

df_majority_downsampled = resample(negative, replace = True, n_samples = 2158) 

train_df = pd.concat([positive, df_majority_downsampled])

from sklearn.utils import shuffle
train_df = shuffle(train_df) 

In [15]:
train_df['class'].value_counts()

negative    2158
positive    2158
Name: class, dtype: int64

In [16]:
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.layers import *
from keras.models import * 
from keras.preprocessing import image 

In [12]:
# CNN Based Model in Keras

model = Sequential()
model.add(Conv2D(32,kernel_size=(3,3),activation='relu',input_shape=(224,224,3)))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(128,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))

model.compile(loss=keras.losses.binary_crossentropy,optimizer='adam',metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 220, 220, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 110, 110, 64)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 110, 110, 64)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 108, 108, 64)      36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 54, 54, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 54, 54, 64)       

In [17]:
train_df, valid_df = train_test_split(train_df, train_size=0.9, random_state=0)

In [18]:
print(f"Negative and positive values of train: {train_df['class'].value_counts()}")
print(f"Negative and positive values of validation: {valid_df['class'].value_counts()}")
print(f"Negative and positive values of test: {test_df['class'].value_counts()}")

Negative and positive values of train: negative    1945
positive    1939
Name: class, dtype: int64
Negative and positive values of validation: positive    219
negative    213
Name: class, dtype: int64
Negative and positive values of test: negative    200
positive    200
Name: class, dtype: int64


In [19]:
train_path = 'Capstone covid data/train'  #directory path
test_path = 'Capstone covid data/test'

In [20]:
train_datagen = ImageDataGenerator(rescale = 1./255, 
                                   shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [21]:
train_gen = train_datagen.flow_from_dataframe(dataframe = train_df, directory=train_path, x_col='filename', 
                                              y_col='class', target_size=(224,224), batch_size=64, 
                                               class_mode='binary')
valid_gen = test_datagen.flow_from_dataframe(dataframe = valid_df, directory=train_path, x_col='filename',
                                             y_col='class', target_size=(224,224), batch_size=64, 
                                            class_mode='binary')
test_gen = test_datagen.flow_from_dataframe(dataframe = test_df, directory=test_path, x_col='filename', 
                                            y_col='class', target_size=(224,224), batch_size=64,
                                             class_mode='binary')


Found 3884 validated image filenames belonging to 2 classes.
Found 432 validated image filenames belonging to 2 classes.
Found 400 validated image filenames belonging to 2 classes.


In [22]:
train_gen.class_indices

{'negative': 0, 'positive': 1}

In [25]:
hist = model.fit_generator(
    train_gen,
    epochs = 20,
    validation_data = valid_gen,
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [26]:
model.evaluate_generator(train_gen)

[0.3611181080341339, 0.8931514024734497]

In [27]:
model.evaluate_generator(valid_gen)

[0.3568831980228424, 0.8657407164573669]

In [28]:
model.save('CNN_model_covid.h5')

In [12]:
model=load_model('CNN_model_covid.h5')

In [23]:
model.evaluate_generator(test_gen)

[0.26798686385154724, 0.8924999833106995]

In [1]:
!pip install flask

Collecting flask
  Downloading Flask-2.0.1-py3-none-any.whl (94 kB)
Collecting itsdangerous>=2.0
  Downloading itsdangerous-2.0.1-py3-none-any.whl (18 kB)
Collecting Jinja2>=3.0
  Downloading Jinja2-3.0.1-py3-none-any.whl (133 kB)
Collecting Werkzeug>=2.0
  Downloading Werkzeug-2.0.1-py3-none-any.whl (288 kB)
Collecting MarkupSafe>=2.0
  Downloading MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl (14 kB)
Installing collected packages: itsdangerous, MarkupSafe, Jinja2, Werkzeug, flask
  Attempting uninstall: MarkupSafe
    Found existing installation: MarkupSafe 1.1.1
    Uninstalling MarkupSafe-1.1.1:
      Successfully uninstalled MarkupSafe-1.1.1
  Attempting uninstall: Jinja2
    Found existing installation: Jinja2 2.11.2
    Uninstalling Jinja2-2.11.2:
      Successfully uninstalled Jinja2-2.11.2
  Attempting uninstall: Werkzeug
    Found existing installation: Werkzeug 0.16.1
    Uninstalling Werkzeug-0.16.1:
      Successfully uninstalled Werkzeug-0.16.1
Successfully installed Jinja2-3

In [44]:
img_path="Capstone covid data/test/0103fadb-1663-40a6-8a9e-09d626cd2091.png"
i = image.load_img(img_path, target_size=(224,224))
i = image.img_to_array(i)/255.0
i = i.reshape(1, 224,224,3)
p = model.predict(i)
print(math.floor(p[0][0]/0.5))

0
