In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
from os import listdir
import pickle
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Flatten, Dropout, Dense, Reshape
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import regularizers
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array

%matplotlib inline

## Loading the data

In [2]:
df = pd.read_csv("/kaggle/input/omariatingsingle/resized_dataset_labels_single/resized_labels_single.csv")

In [4]:
df.head()

Unnamed: 0,Path,No Finding,Pneumonia,Pneumothorax,Effusion,Cardiomegaly
0,images/00000001_000.png,0,0,0,0,1
1,images/00000001_001.png,0,0,0,0,1
2,images/00000002_000.png,1,0,0,0,0
3,images/00000005_005.png,1,0,0,0,0
4,images/00000008_000.png,0,0,0,0,1


In [5]:
data = df.sample(frac=1).reset_index(drop=True)

In [6]:
data.head()

Unnamed: 0,Path,No Finding,Pneumonia,Pneumothorax,Effusion,Cardiomegaly
0,images/943402930671.png,0,1,0,0,0
1,images/00029328_001.png,1,0,0,0,0
2,images/373500799539.png,0,1,0,0,0
3,images/806553113655.png,0,0,1,0,0
4,images/00015732_024.png,0,0,0,1,0


In [7]:
print(data['Path'].head())

0    images/943402930671.png
1    images/00029328_001.png
2    images/373500799539.png
3    images/806553113655.png
4    images/00015732_024.png
Name: Path, dtype: object


In [8]:
class ImageDataset(Sequence):
    def __init__(self, df, img_dir, batch_size=32, img_size=(224, 224), shuffle=True):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, idx):
        batch_df = self.df.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_images = []
        batch_labels = []

        for _, row in batch_df.iterrows():
            filename = os.path.basename(row["Path"])
            path = os.path.join(self.img_dir, filename)

            if os.path.exists(path):
                img = load_img(path, target_size=self.img_size)
                img = img_to_array(img) / 255.0
                batch_images.append(img)

                label = row.drop("Path").values.astype("float32")
                batch_labels.append(label)

        return np.array(batch_images), np.array(batch_labels)

    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)

In [9]:
print(df.columns)  # ensure all classes are included

Index(['Path', 'No Finding', 'Pneumonia', 'Pneumothorax', 'Effusion',
       'Cardiomegaly'],
      dtype='object')


## Train/Val/Test split

In [10]:
# Train/Test split
train_df, test_df = train_test_split(data, test_size=0.15, random_state=42)

# Further split train into train/val
train_df, val_df = train_test_split(train_df, test_size=0.18, random_state=42)

In [11]:
train_gen = ImageDataset(train_df, '/kaggle/input/omariatingsingle/resized_dataset_labels_single/images', batch_size=32)
val_gen = ImageDataset(val_df, '/kaggle/input/omariatingsingle/resized_dataset_labels_single/images', batch_size=32, shuffle=False)
test_gen = ImageDataset(test_df, '/kaggle/input/omariatingsingle/resized_dataset_labels_single/images', batch_size=32, shuffle=False)

In [16]:
import keras
from keras.applications.densenet import DenseNet121
from tensorflow.keras.applications import MobileNetV2
from keras.models import Model
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping, TensorBoard
from keras.preprocessing import image
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.applications import EfficientNetB0

In [13]:
from keras import backend as K
K.clear_session()

In [14]:
# Enable multi-GPU support
strategy = tf.distribute.MirroredStrategy()
print("Number of devices:", strategy.num_replicas_in_sync)


Number of devices: 2


In [17]:
with strategy.scope():
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = BatchNormalization()(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.4)(x)
    predictions = Dense(5, activation="sigmoid", kernel_regularizer=l2(0.05))(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        loss=BinaryCrossentropy(),
        optimizer=opt,
        metrics=[
            tf.keras.metrics.BinaryAccuracy(), 
            tf.keras.metrics.F1Score(),
            tf.keras.metrics.Recall(),
            tf.keras.metrics.Precision(),
            tf.keras.metrics.AUC()
        ]
    )


In [18]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=2)
]

## Training the model

In [19]:
history = model.fit(
    train_gen,
    validation_data=val_gen, 
    epochs = 10,
    callbacks=callbacks)

Epoch 1/10
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 299ms/step - auc_1: 0.7183 - binary_accuracy: 0.6663 - f1_score: 0.4095 - loss: 0.9894 - precision_1: 0.3354 - recall_1: 0.6422 - val_auc_1: 0.5029 - val_binary_accuracy: 0.7414 - val_f1_score: 0.0846 - val_loss: 0.7577 - val_precision_1: 0.2594 - val_recall_1: 0.1580 - learning_rate: 1.0000e-04
Epoch 2/10
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 245ms/step - auc_1: 0.8748 - binary_accuracy: 0.8493 - f1_score: 0.6237 - loss: 0.5496 - precision_1: 0.6138 - recall_1: 0.6692 - val_auc_1: 0.8863 - val_binary_accuracy: 0.8667 - val_f1_score: 0.6316 - val_loss: 0.4408 - val_precision_1: 0.6917 - val_recall_1: 0.6014 - learning_rate: 1.0000e-04
Epoch 3/10
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 245ms/step - auc_1: 0.9107 - binary_accuracy: 0.8874 - f1_score: 0.6986 - loss: 0.4015 - precision_1: 0.7457 - recall_1: 0.6631 - val_auc_1: 0.9055 - val_binary_accuracy

In [None]:
# Save the model after initial training
model.save("lung_disease_model_final.h5")
print("Model saved successfully!")