In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import CategoricalAccuracy
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
import os
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image

# Set your paths
train_data_dir = '/kaggle/input/labeled-chest-xray-images/chest_xray/train'

# Get a list of class names (assuming subdirectories are named after classes)
class_names = sorted(os.listdir(train_data_dir))

# Display the first few images from each class
num_images_to_display = 3

for class_name in class_names:
    class_path = os.path.join(train_data_dir, class_name)
    class_images = os.listdir(class_path)[:num_images_to_display]

    print(f"\nClass: {class_name}")
    plt.figure(figsize=(15, 3))

    for i, image_name in enumerate(class_images, 1):
        img_path = os.path.join(class_path, image_name)
        img = image.load_img(img_path, target_size=(224, 224))
        plt.subplot(1, num_images_to_display, i)
        plt.imshow(img)
        plt.title(f"Image {i}")

    plt.show()

In [None]:
train_data_dir = '/kaggle/input/labeled-chest-xray-images/chest_xray/train'
test_data_dir = '/kaggle/input/labeled-chest-xray-images/chest_xray/test'

# Get a list of class names (assuming subdirectories are named after classes)
class_names_train = sorted(os.listdir(train_data_dir))
class_names_test = sorted(os.listdir(test_data_dir))

# Display the number of images in each class for the training set
print("Training Dataset:")
for class_name in class_names_train:
    class_path = os.path.join(train_data_dir, class_name)
    num_images = len(os.listdir(class_path))
    print(f"Class: {class_name}, Number of Images: {num_images}")

# Display the number of images in each class for the test set
print("\nTest Dataset:")
for class_name in class_names_test:
    class_path = os.path.join(test_data_dir, class_name)
    num_images = len(os.listdir(class_path))
    print(f"Class: {class_name}, Number of Images: {num_images}")

**DATA AUGMENTATION**

In [None]:
# Add our data-augmentation parameters to ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255.,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator(rescale=1.0/255.)

# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    batch_size=20,
                                                    class_mode='binary',
                                                    shuffle=True,
                                                    target_size=(224, 224))  # Corrected target_size

# Flow validation images in batches of 20 using test_datagen generator
validation_generator = test_datagen.flow_from_directory(test_data_dir,
                                                        batch_size=20,
                                                        class_mode='binary',
                                                        shuffle=False,
                                                        target_size=(224, 224))  # Corrected target_size

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
# Set your image dimensions
img_width, img_height = 224, 224

# Load the pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Create your model
model = models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))  # Change this to the number of classes you have

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Change this to 'categorical_crossentropy' if you have multiple classes
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Define ModelCheckpoint callback to save the best model during training
model_checkpoint = ModelCheckpoint('chest_model.h5', save_best_only=True, monitor='val_accuracy', mode='max')

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model with callbacks
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=30,
    validation_data=validation_generator,
    validation_steps=len(validation_generator),
    callbacks=[early_stopping, model_checkpoint]  # Add callbacks here
)

In [None]:
# Evaluate the model on the validation set
import seaborn as sns
model = tf.keras.models.load_model('chest_model.h5')  # Load the best model
val_predictions = model.predict(validation_generator)
val_pred_classes = (val_predictions > 0.5).astype(int)  # Binary classification threshold

# Get true classes
val_true_classes = validation_generator.classes

# Plot the confusion matrix with numbers
cm = confusion_matrix(val_true_classes, val_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
classes = validation_generator.class_indices.keys()
tick_marks = range(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
plt.show()

# Print the classification report
print("Classification Report:\n", classification_report(val_true_classes, val_pred_classes, target_names=classes))

In [None]:
from IPython.display import FileLink

# Assuming your model file is 'best_model.h5'
model_file_path = 'chest_model.h5'

# Create a link to download the file
FileLink(r'chest_model.h5')