In [None]:
# Data Reading 
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from glob import glob
from PIL import Image

# Data Processing 
import numpy as np
import pandas as pd
import cv2
import random
import albumentations as A

# Data Analysis
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

# Grad-CAM
import keras
import matplotlib.cm as cm

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential
from tensorflow.python.keras import layers, models
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling  import RandomUnderSampler 

***Data Gathering***

In [None]:
levels = ['NORMAL', 'COVID19', 'PNEUMONIA']
train_path = "../input/chest-xray-covid19-pneumonia/Data/train"
test_path = "../input/chest-xray-covid19-pneumonia/Data/test"
train_data_dir = os.path.join(train_path)
test_path_dir = os.path.join(test_path)

train_data = []
for id, level in enumerate(levels):
    for file in os.listdir(os.path.join(train_data_dir, level)):
        train_data.append(['{}/{}'.format(level, file), level])

train_data = pd.DataFrame(train_data, columns = ['image_file', 'corona_result']) 
train_data['path'] = train_path + '/' + train_data['image_file']
              
test_data = []
for id, level in enumerate(levels):
    for file in os.listdir(os.path.join(test_path_dir, level)):
        test_data.append(['{}/{}'.format(level, file), level])
        
test_data = pd.DataFrame(test_data, columns = ['image_file', 'corona_result'])
test_data['path'] = test_path + '/' + test_data['image_file']


train_data['corona_result'] = train_data['corona_result'].map({'NORMAL': 'NORMAL', 'COVID19': 'COVID19', 'PNEUMONIA': 'PNEUMONIA'})
test_data['corona_result'] = test_data['corona_result'].map({'NORMAL': 'NORMAL', 'COVID19': 'COVID19', 'PNEUMONIA': 'PNEUMONIA'})
samples = 5144

data = []
data = train_data
data.head()

In [None]:
print('Number of Duplicated Samples: %d'%(data.duplicated().sum()))
print('Number of Total Samples: %d'%(data.isnull().value_counts()))

***Image Distribution***

In [None]:
df = pd.DataFrame()
df['corona_result'] = ['COVID19', 'NORMAL','PNEUMONIA']
df['Count'] = [len(data[data['corona_result'] == 'COVID19']), len(data[data['corona_result'] == 'NORMAL']),len(data[data['corona_result'] == 'PNEUMONIA'])]
df = df.sort_values(by = ['Count'], ascending = False)

fig = px.bar(df, x = 'corona_result', y = 'Count', 
             color = "corona_result", text_auto='', width = 600, 
             color_discrete_sequence = ["orange", "green","red"],
             template = 'simple_white')

fig.update_xaxes(showgrid = False)
fig.update_yaxes(showgrid = False)
fig.update_traces(textfont_size = 12, textangle = 0, textposition = "outside", cliponaxis = False)

fig.show()

In [None]:
data['image'] = data['path'].map(lambda x: np.asarray(Image.open(x).resize((75,75))))

data.head()

In [None]:
n_samples = 3

fig, m_axs = plt.subplots(3, n_samples, figsize = (6*n_samples, 3*4))

for n_axs, (type_name, type_rows) in zip(m_axs, data.sort_values(['corona_result']).groupby('corona_result')):
    n_axs[1].set_title(type_name, fontsize = 15)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state = 1234).iterrows()):       
        picture = c_row['path']
        image = cv2.imread(picture)
        c_ax.imshow(image)
        c_ax.axis('off')

In [None]:
plt.figure()
image = cv2.imread("../input/chest-xray-covid19-pneumonia/Data/train/COVID19/COVID19(148).jpg")
plt.imshow(image)
plt.axis('off')
plt.show()

In [None]:
print('Image Shape: {}'.format(image.shape))
print('Image Height: {}'.format(image.shape[0]))
print('Image Width: {}'.format(image.shape[1]))
print('Image Dimension: {}'.format(image.ndim))
print('Image Size: {}kb'.format(image.size//1024))
print('Image Data Type: {}'.format(image.dtype))
print('Maximum RGB value of the image: {}'.format(image.max()))
print('Minimum RGB value of the image: {}'.format(image.min()))

In [None]:
def plot_multiple_img(img_matrix_list, title_list, ncols, main_title = ""):
    
    fig, myaxes = plt.subplots(figsize = (15, 8), nrows = 2, ncols = ncols, squeeze = False)
    fig.suptitle(main_title, fontsize = 18)
    fig.subplots_adjust(wspace = 0.3)
    fig.subplots_adjust(hspace = 0.3)
    
    for i, (img, title) in enumerate(zip(img_matrix_list, title_list)):
        myaxes[i // ncols][i % ncols].imshow(img)
        myaxes[i // ncols][i % ncols].set_title(title, fontsize = 15)
        
    plt.show()

In [None]:
chosen_image = cv2.imread("../input/chest-xray-covid19-pneumonia/Data/train/COVID19/COVID19(148).jpg")

albumentation_list = [A.RandomFog(p = 1), A.RandomBrightness(p = 1),
                      A.RandomCrop(p = 1,height = 199, width = 199), A.Rotate(p = 1, limit = 90),
                      A.RGBShift(p = 1), A.VerticalFlip(p = 1), A.RandomContrast(limit = 0.5, p = 1)]

img_matrix_list = []
bboxes_list = []
for aug_type in albumentation_list:
    img = aug_type(image = chosen_image)['image']
    img_matrix_list.append(img)

img_matrix_list.insert(0,chosen_image)    

titles_list = ["Original", "RandomFog", "RandomBrightness", "RandomCrop", "Rotate", "RGBShift", "VerticalFlip", "RandomContrast"]

plot_multiple_img(img_matrix_list, titles_list, ncols = 4, main_title = "Different Types of Augmentations")

In [None]:
mean_val = []
std_dev_val = []
max_val = []
min_val = []

for i in range(0, samples):
    mean_val.append(data['image'][i].mean())
    std_dev_val.append(np.std(data['image'][i]))
    max_val.append(data['image'][i].max())
    min_val.append(data['image'][i].min())

imageEDA = data.loc[:,['image','corona_result','path']]
imageEDA['mean'] = mean_val
imageEDA['stedev'] = std_dev_val
imageEDA['max'] = max_val
imageEDA['min'] = min_val

imageEDA['subt_mean'] = imageEDA['mean'].mean() - imageEDA['mean']

In [None]:
ax1 = sns.displot(data = imageEDA, x = 'mean', kind="kde", hue = 'corona_result');
plt.title('Images Colour Mean Value Distribution by Class\n', fontsize = 12);

ax2 = sns.displot(data = imageEDA, x = 'max', kind="kde", hue = 'corona_result');
plt.title('\nImages Colour Max Value Distribution by Class\n', fontsize = 12);

ax3 = sns.displot(data = imageEDA, x = 'min', kind="kde", hue = 'corona_result');
plt.title('\nImages Colour Min Value Distribution by Class\n', fontsize = 12);

In [None]:
plt.figure(figsize = (20, 8))
sns.set(style = "ticks", font_scale = 1)
ax = sns.scatterplot(data = imageEDA, x = "mean", y = imageEDA['stedev'], hue = 'corona_result', alpha = 0.8);
sns.despine(top = True, right = True, left = False, bottom = False)
plt.xticks(rotation = 0, fontsize = 12)
ax.set_xlabel('\nImage Channel Colour Mean', fontsize = 14)
ax.set_ylabel('Image Channel Colour Standard Deviation', fontsize = 14)
plt.title('Mean and Standard Deviation of Image Samples', fontsize = 16);

We observe that for pixels having Std Deviation below 30 are most probably Covid-19 images 

In [None]:
plt.figure(figsize = (10, 8));
g = sns.FacetGrid(imageEDA, col = "corona_result", height = 5);
g.map_dataframe(sns.scatterplot, x = 'mean', y = 'stedev');
g.set_titles(col_template = "{col_name}", row_template= "{row_name}", size = 12);
g.fig.subplots_adjust(top = .7);
g.fig.suptitle('Mean and Standard Deviation of Image Samples', fontsize = 15);
axes = g.axes.flatten();
axes[0].set_ylabel('Standard Deviation');
for ax in axes:
    ax.set_xlabel('\nMean');
g.fig.tight_layout();