# Chest X-ray Project

**Recognize Human Chest X-ray Images**

**Author: Andrii Kobyliuk**

# PART 0 - Preparing
**Import Libaries**

In [None]:
from random import randint
import os
import numpy as np 
import pandas as pd
import cv2

import matplotlib.pyplot as plot
import matplotlib.gridspec as gridspec
import seaborn as sns

from IPython.display import SVG

from sklearn.utils import shuffle

import keras
import tensorflow.keras.layers as Layers
import tensorflow.keras.activations as Actications
import tensorflow.keras.models as Models
import tensorflow.keras.optimizers as Optimizer
import tensorflow.keras.metrics as Metrics
import tensorflow.keras.utils as Utils
from keras.utils.vis_utils import model_to_dot

# PART 1 - Data Sets

**Helper Function For Image Extraction**

In [None]:
def get_images(directory):
    Images = []
    for dirname, _, filenames in os.walk(directory):
        current, total = 0, len(filenames)
        for filename in sorted(filenames):
            image = cv2.imread(os.path.join(dirname, filename))
            image = cv2.resize(image,(64,64))
            Images.append(image)
            current += 1
            if current % (int(total / 10)) == 0:
                print(round(current/total * 100) , "%")
    return Images

**Load Train Images**

In [None]:
train_images = get_images('/kaggle/input/cxray/train/train/')
    
print(len(train_images))

**Load Train Labels From train.csv**

In [None]:
labeled_data = pd.read_csv('/kaggle/input/cxray/train.csv').sort_values(by='id')

train_labels = []
for _, row in labeled_data.iterrows():
    train_labels.append(row['label'])
    
print(len(train_labels))

**Show Train Dataset Samples**

In [None]:
f,ax = plot.subplots(5,5) 
f.subplots_adjust(0,0,3,3)
for i in range(0,5,1):
    for j in range(0,5,1):
        rnd_number = randint(0,len(train_images))
        ax[i,j].imshow(train_images[rnd_number])
        ax[i,j].set_title(train_labels[rnd_number])
        ax[i,j].axis('off')

In [None]:
vc = labeled_data['label'].value_counts()
plot.figure(figsize=(9,5))
sns.barplot(x = vc.index, y = vc, palette = "rocket")
plot.title("Number of pictures of each category", fontsize = 15)
plot.show()

# PART 2 - Transform Data For Training

**Split Data Into Two Separate Parts For Training And Testing**

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(train_images, train_labels, test_size=0.15, random_state=17)

x_train = np.array(x_train) 
x_test = np.array(x_test)
y_train = np.array(y_train) 
y_train = keras.utils.to_categorical(y_train, num_classes = 2)

y_test = np.array(y_test)
y_test = keras.utils.to_categorical(y_test, num_classes = 2)

print("Shape of train images:",x_train.shape)
print("Shape of train labels:",y_train.shape)

print("Shape of test images:",x_test.shape)
print("Shape of test labels:",y_test.shape)

# PART 3 - Model Building

**Define Model Arhitecture**

In [None]:
model = Models.Sequential()

model.add(Layers.Conv2D(200,kernel_size=(3,3),activation='relu',input_shape=(64,64,3)))
model.add(Layers.Conv2D(180,kernel_size=(3,3),activation='relu'))
model.add(Layers.MaxPool2D(5,5))
model.add(Layers.Conv2D(180,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(140,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(100,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(50,kernel_size=(3,3),activation='relu'))
model.add(Layers.MaxPool2D(4,4))
model.add(Layers.Flatten())
model.add(Layers.Dense(180,activation='relu'))
model.add(Layers.Dense(100,activation='relu'))
model.add(Layers.Dense(50,activation='relu'))
model.add(Layers.Dropout(rate=0.5))
model.add(Layers.Dense(2,activation='softmax'))

**Define Optimizer**

In [None]:
adam_optimizer = Optimizer.Adam(
    learning_rate=0.001,
    beta_1=0.6,
    beta_2=0.95,
    epsilon=1e-05,
    amsgrad=True,
    name="Adam"
)

**Compile Model**

In [None]:
model.compile(optimizer=adam_optimizer,loss='binary_crossentropy',metrics=['binary_accuracy'])

**View Model Summary**

In [None]:
model.summary()
SVG(model_to_dot(model).create(prog='dot', format='svg'))
Utils.plot_model(model,to_file='model.png',show_shapes=True)

# PART 4 - Model Training And Testing

**Model Training**

In [None]:
history = model.fit(x_train,y_train,epochs=3,validation_split=0.30, batch_size=64)

In [None]:
# model.save('model-0.986-2')

**View History Of Training**

In [None]:
pd.DataFrame(history.history).plot()

**Check Accuracy And Loss On Testing Data**

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)

print(f"Accuracy : {accuracy}")
print(f"Loss : {loss}")

**Load Data For Prediction**

In [None]:
pred_images = get_images('/kaggle/input/cxray/test/test/')
pred_images = np.array(pred_images)

**Predict Data Using The Model**

In [None]:
pred_labels = model.predict(pred_images)
pred_labels = np.argmax(pred_labels,  axis=-1).round(0)

**Results of Predictions**

In [None]:
f,ax = plot.subplots(5,5) 
f.subplots_adjust(0,0,3,3)
for i in range(0,5,1):
    for j in range(0,5,1):
        rnd_number = randint(0,len(pred_images))
        ax[i,j].imshow(pred_images[rnd_number])
        ax[i,j].set_title(pred_labels[rnd_number])
        ax[i,j].axis('off')

**Helper Function**

In [None]:
def get_filenames(directory):
    Names = []
    for dirname, _, filenames in os.walk(directory):
        current, total = 0, len(filenames)
        for filename in sorted(filenames):
            Names.append(filename)
    return Names

**Save Results in submisson.csv**

In [None]:

names = get_filenames('/kaggle/input/cxray/test/test/')

result_pairs = dict(zip(names, pred_labels))

result_sub = open("submission.csv", "w")
result_sub.write("id,label\n")

sample_sub = open('../input/cxray/sample_submission.csv')
for i,r in enumerate(sample_sub):
    if i == 0:
        continue
    else:
        filename = r.split(',')[0]
        print(filename, '\t', result_pairs[filename])
        result_sub.write("{},{}\n".format(filename, result_pairs[filename]))

result_sub.flush()      
result_sub.close()
sample_sub.close()