In [None]:
from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model

from keras.applications.efficientnet_v2 import EfficientNetV2B1
from keras.applications.vgg19 import preprocess_input
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
import tensorflow as tf
from keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt

In [None]:
import os
import kaggle


# Set up Kaggle API credentials
download_path = '/tmp'
# Downloading the data
kaggle.api.dataset_download_files('paultimothymooney/chest-xray-pneumonia', path=download_path, unzip=True)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import shutil
import random

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/tmp/chest_xray/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

dataset_path= '/tmp/chest_xray/'
new_dataset_path = '/tmp/chest_xray_new/'
if not os.path.exists(new_dataset_path):
   for split in ['train', 'val', 'test']:
       for cls in ['NORMAL', 'PNEUMONIA']:
           os.makedirs(f'{new_dataset_path}/{split}/{cls}', exist_ok=True)

   for cls in ['NORMAL', 'PNEUMONIA']:
       all_files = []
       for split in ['train', 'val', 'test']:
           source_folder = f'{dataset_path}/{split}/{cls}'
           files = os.listdir(source_folder)
           all_files.extend([(file, source_folder) for file in files])

       random.shuffle(all_files)

       train_files = all_files[:int(len(all_files)*0.8)]
       val_files = all_files[int(len(all_files)*0.8):int(len(all_files)*0.9)]
       test_files = all_files[int(len(all_files)*0.9):]

       for file, source_folder in train_files:
           dest = f'{new_dataset_path}/train/{cls}/{file}'
           shutil.copy(f'{source_folder}/{file}', dest)

       for file, source_folder in val_files:
           dest = f'{new_dataset_path}/val/{cls}/{file}'
           shutil.copy(f'{source_folder}/{file}', dest)

       for file, source_folder in test_files:
           dest = f'{new_dataset_path}/test/{cls}/{file}'
           shutil.copy(f'{source_folder}/{file}', dest) 

In [None]:
input_shape = (224, 224, 3)

train_path =  '/tmp/chest_xray_new/train'
test_path =  '/tmp/chest_xray_new/test'
valid_path =  '/tmp/chest_xray_new/val'


In [1]:
input_tensor = Input(shape=input_shape)
Efv21 = EfficientNetV2B1(include_top=False, input_tensor=input_tensor, weights="imagenet")


NameError: name 'EfficientNetV2B1' is not defined

In [None]:

# don't train existing weights
for layer in Efv21.layers:
    layer.trainable = False

In [None]:
# our layers - 
x = Flatten()(Efv21.output)

In [None]:
prediction = Dense(2, activation='softmax')(x)

In [None]:
model = Model(inputs=Efv21.input, outputs=prediction)

In [None]:
model.summary()

In [None]:
from tensorflow.keras import callbacks, optimizers


checkpoint_callback = callbacks.ModelCheckpoint(filepath="tmp/best_model.keras", monitor='val_accuracy', save_best_only=True)

from tensorflow.keras.callbacks import EarlyStopping

# Define early stopping callback
early_stopping_callback = EarlyStopping(monitor='val_accuracy', patience=5,mode='max', verbose=1 )

model.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [None]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
valid_datagen = ImageDataGenerator(rescale = 1./255)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
training_set = train_datagen.flow_from_directory(train_path,
                                                 target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

In [None]:
valid_set = valid_datagen.flow_from_directory(test_path,
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'categorical')

In [None]:
test_set = test_datagen.flow_from_directory(valid_path,
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'categorical')

In [None]:
import math
import numpy as np
batch_size=32

number_of_examples = len(test_set.filenames)
number_of_generator_calls = math.ceil(number_of_examples / batch_size)

test_labels = []

# Iterate through the generator calls
for i in range(0, int(number_of_generator_calls)):
    # Get the labels for the current batch
    batch_labels = np.array(test_set[i][1])
    test_labels.extend(batch_labels)
    


In [None]:
# fit the model

r = model.fit(
  training_set,
  validation_data=valid_set,
  epochs=10,
 callbacks=[checkpoint_callback,early_stopping_callback]
)


In [None]:
# plot the loss
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig('LossVal_loss')

# plot the accuracy
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')

plt.legend()
plt.show()
plt.savefig('AccVal_acc')

In [None]:
test_loss, test_acc = model.evaluate(test_set)
print('Test accuracy:', test_acc)

In [None]:
from sklearn.metrics import classification_report


y_pred = model.predict(test_set)

print(classification_report(np.argmax(test_labels,axis=1), np.argmax(y_pred,axis=1)))

In [None]:
import tensorflow as tf

from keras.models import load_model

model.save('model_vgg19.h5')

In [None]:
def show_image(img_name):
  img = plt.imread(img_name)
  plt.imshow(img)
  plt.show()