<a href="https://colab.research.google.com/github/BarelHeby/Deep-Learning---Chest-X-Ray/blob/main/Deep_Learning_Chest_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


---
**FIRST SETUP**

In [None]:
!pip install -q kaggle
from google.colab import files

# Create a Kaggle API token and save it to a file.
!echo '{"username":"barelheby","key":"978bdd6cd1cc991c69bbe920fe75a9cc"}' > kaggle.json

# Copy the Kaggle API token file to the correct location.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/kaggle.json

# Change the permissions of the Kaggle API token file.
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset from Kaggle.
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

# Unzip the downloaded dataset.
!unzip chest-xray-pneumonia.zip

# Print the contents of the current working directory.
!ls


---
**IMPORTS**

In [None]:
import pandas as pd
from os.path import join
import tensorflow as tf



---
 **Configuration**


In [149]:
TF_BATCH_SIZE = 32
TF_SEED = 123
IMG_HEIGHT = 256
IMG_WIDTH = 256
DIR_DATASET = "/content/chest_xray"

---
**Normalize Image Function**



---

**Training Set Preprocess**

In [None]:
train_directory = join(DIR_DATASET,"train")
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_directory,
    validation_split=0.2,
    subset = "training",
    seed = TF_SEED,
    image_size = (IMG_HEIGHT,IMG_WIDTH),
    batch_size = TF_BATCH_SIZE,
)

Found 5216 files belonging to 2 classes.
Using 4173 files for training.


---

**Validation Set Prep**

In [None]:
validation_dataset = tf.keras.utils.image_dataset_from_directory(
    train_directory,
    validation_split=0.2,
    subset = "validation",
    seed = TF_SEED,
    image_size = (IMG_HEIGHT,IMG_WIDTH),
    batch_size = TF_BATCH_SIZE
)

Found 5216 files belonging to 2 classes.
Using 1043 files for validation.


In [None]:
train_dataset.class_names

['NORMAL', 'PNEUMONIA']

In [None]:
train_encoded_class_names = [i for (i,label) in enumerate(train_dataset.class_names)]
validation_encoded_class_names = [i for (i,label) in enumerate(validation_dataset.class_names)]


In [None]:
train_dataset = train_dataset.map(lambda x,y:(x/255.0,y))
validation_dataset = validation_dataset.map(lambda x,y:(x/255.0,y))

---
**Part 1 Model**

In [None]:
from keras import Sequential
from tensorflow.keras.applications import VGG16
from keras.layers import BatchNormalization,Dropout,Dense,Flatten
from tensorflow.keras.optimizers import Adamax,Adam


In [None]:
base_network = VGG16(weights="imagenet",include_top=False,input_shape = (IMG_HEIGHT,IMG_WIDTH,3),pooling="avg")
base_network.trainable = False

In [None]:
model_assigment_1  = Sequential(name="Assigment_1")
model_assigment_1.add(base_network)
model_assigment_1.add(Flatten())
model_assigment_1.add(Dense(512,activation="relu"))
model_assigment_1.add(Dropout(0.50))
model_assigment_1.add(Dense(128,activation="relu"))
model_assigment_1.add(Dropout(0.25))
model_assigment_1.add(Dense(60,activation="relu"))
model_assigment_1.add(Dense(1,activation="sigmoid"))
model_assigment_1.compile(optimizer=Adam(),loss='binary_crossentropy', metrics=['accuracy'])
model_assigment_1.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model_assigment_1.fit(train_dataset,
                        epochs= 10,
                        validation_data = validation_dataset,
                        callbacks = early_stopping)

In [70]:
validation_loss, validation_accuracy = model_assigment_1.evaluate(validation_dataset)
print("Validation Loss:", validation_loss)
print("Validation Accuracy:", validation_accuracy)

Validation Loss: 0.07857345789670944
Validation Accuracy: 0.9741131067276001


In [90]:
import os
import shutil

In [94]:
DIR_ASS_2 = join(DIR_DATASET,"assiment_2_dataset")
DIR_ASS_2_BACTERIA  = join(DIR_ASS_2,"BACTERIA")
DIR_ASS_2_VIRUS = join(DIR_ASS_2,"VIRUS")
DIR_ASS_2_NORMAL = join(DIR_ASS_2,"NORMAL")


In [98]:
try:
  shutil.rmtree(DIR_ASS_2)
except Exception as e:
  print(e)
try:
  os.mkdir(DIR_ASS_2)
  os.mkdir(DIR_ASS_2_BACTERIA)
  os.mkdir(DIR_ASS_2_VIRUS)
  os.mkdir(DIR_ASS_2_NORMAL)
except Exception as e:
  print(e)

---
**Copy Images From Source To New Folder**

Copy sick

In [99]:
# prompt: copy all files from DIR_DATASET/test/PNEUMONIA to DIR_DATASET/assigment_2_dataset
source_dir = join(DIR_DATASET,"train","PNEUMONIA")
for filename in os.listdir(source_dir):
  if "bacteria" in filename.lower():
    dest_dir  = DIR_ASS_2_BACTERIA
  elif "virus" in filename.lower():
    dest_dir = DIR_ASS_2_VIRUS
  else:
    pass
  shutil.copy(join(source_dir,filename),dest_dir)



Copy normal

In [100]:
source_dir = join(DIR_DATASET,"train","NORMAL")
for filename in os.listdir(source_dir):
  shutil.copy(join(source_dir,filename),DIR_ASS_2_NORMAL)

---
**Assigment 2 preprocess**

In [158]:
train_dataset_ass_2 = tf.keras.utils.image_dataset_from_directory(
    DIR_ASS_2,
    validation_split=0.2,
    subset = "training",
    seed = TF_SEED,
    image_size = (IMG_HEIGHT,IMG_WIDTH),
    batch_size = TF_BATCH_SIZE,
)
validation_dataset_ass_2 = tf.keras.utils.image_dataset_from_directory(
    DIR_ASS_2,
    validation_split=0.2,
    subset = "validation",
    seed = TF_SEED,
    image_size = (IMG_HEIGHT,IMG_WIDTH),
    batch_size = TF_BATCH_SIZE
)

Found 5216 files belonging to 3 classes.
Using 4173 files for training.
Found 5216 files belonging to 3 classes.
Using 1043 files for validation.


In [145]:
train_encoded_class_names_ass_2 = [i for (i,label) in enumerate(train_dataset_ass_2.class_names)]
validation_encoded_class_names_ass_2 = [i for (i,label) in enumerate(validation_dataset_ass_2.class_names)]

In [171]:
train_dataset_ass_2 = train_dataset_ass_2.map(lambda x,y:(x/255.0,y))
validation_dataset_ass_2 = validation_dataset_ass_2.map(lambda x,y:(x/255.0,y))

In [118]:
base_network_ass_2 = VGG16(weights="imagenet",include_top=False,input_shape = (IMG_HEIGHT,IMG_WIDTH,3),pooling="avg")
base_network_ass_2.trainable = False

In [169]:
model_assigment_2  = Sequential(name="Assigment_2")
model_assigment_2.add(base_network_ass_2)
model_assigment_2.add(Flatten())
model_assigment_2.add(Dense(512,activation="relu"))
model_assigment_2.add(Dropout(0.50))
model_assigment_2.add(Dense(128,activation="relu"))
model_assigment_2.add(Dropout(0.25))
model_assigment_2.add(Dense(60,activation="relu"))
model_assigment_2.add(Dense(2,activation="softmax"))
model_assigment_2.compile(optimizer=Adam(),loss='categorical_crossentropy', metrics=['accuracy',"categorical_accuracy"])
model_assigment_2.summary()

Model: "Assigment_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_27 (Flatten)        (None, 512)               0         
                                                                 
 dense_109 (Dense)           (None, 512)               262656    
                                                                 
 dropout_58 (Dropout)        (None, 512)               0         
                                                                 
 dense_110 (Dense)           (None, 128)               65664     
                                                                 
 dropout_59 (Dropout)        (None, 128)               0         
                                                                 
 dense_111 (Dense)           (None, 60)                

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history_ass_2 = model_assigment_2.fit(train_dataset_ass_2,
                        epochs= 10,
                        validation_data = validation_dataset_ass_2,
                        callbacks = early_stopping)

In [163]:
train_dataset_ass_2.class_names

AttributeError: '_MapDataset' object has no attribute 'class_names'

In [175]:
# print(len(train_dataset))
for e in train_dataset:
  print(e.count)
  break

131
<built-in method count of tuple object at 0x7a20f410c9c0>
