In [1]:
import h5py
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
import pandas as pd

labels = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema',
          'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening',
          'Pneumonia', 'Pneumothorax']

# Load real images and labels from HDF5 file
real_images_h5_path = 'chest_xray.h5'
with h5py.File(real_images_h5_path, 'r') as h5_data:
    real_images = h5_data['images'][:1000]  # Load only the first 1000 images
    real_labels = np.array([h5_data[label][:1000] for label in labels])

# Load fake images and labels from HDF5 file
fake_images_h5_path = 'chest_xray_gen1000.h5'
with h5py.File(fake_images_h5_path, 'r') as h5_data:
    fake_images = h5_data['images'][:1000]  # Load only the first 1000 images
    fake_labels = np.array([h5_data[label][:1000] for label in labels])

# Convert real and fake labels to DataFrames

real_labels_df = pd.DataFrame(real_labels.T, columns=labels)  # Transpose to align with images
fake_labels_df = pd.DataFrame(fake_labels.T, columns=labels)

# Display shapes of real and fake data
print("Real images shape:", real_images.shape)
print("Real labels shape:", real_labels_df.shape)
print("Fake images shape:", fake_images.shape)
print("Fake labels shape:", fake_labels_df.shape)


Real images shape: (1000, 128, 128, 1)
Real labels shape: (1000, 14)
Fake images shape: (1000, 128, 128, 1)
Fake labels shape: (1000, 14)


In [3]:
# Split real data into training and testing sets
real_labels_np = real_labels_df.to_numpy()
x_real_train, x_real_test, y_real_train, y_real_test = train_test_split(real_images, real_labels_np, test_size=0.2, random_state=42)
fake_labels_np = fake_labels_df.to_numpy()
x_fake_train, x_fake_test, y_fake_train, y_fake_test = train_test_split(fake_images, fake_labels_np, test_size=0.2, random_state=42)

In [4]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam

# Define the model
custom_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    BatchNormalization(),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(14, activation='sigmoid')  
])

# Compile the model
optimizer = Adam(learning_rate=0.001)
custom_model.compile(optimizer=optimizer,
                     loss='binary_crossentropy',
                     metrics=['accuracy', 'mae'])

# Print the model summary
custom_model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      320       
                                                                 
 batch_normalization (Batch  (None, 126, 126, 32)      128       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 124, 124, 32)      9248      
                                                                 
 batch_normalization_1 (Bat  (None, 124, 124, 32)      128       
 chNormalization)                                                
                                                                 
 max_pooling2d (MaxPooling2  (None, 62, 62, 32)        0         
 D)                                                              
                                                        

In [5]:
custom_model.fit(x_real_train, y_real_train, validation_data=(x_real_test, y_real_test), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x185d09e9850>

In [6]:
custom_model.save(filepath='classification_model/xray_model_1000real.keras')
real_scores = custom_model.evaluate(x_real_test, y_real_test, verbose=0)
print("Real Data - Test Loss:", real_scores[0])
print("Real Data - Test Accuracy:", real_scores[1])

Real Data - Test Loss: 0.34973934292793274
Real Data - Test Accuracy: 0.29499998688697815


In [7]:
custom_model.fit(x_fake_train, y_fake_train, validation_data=(x_fake_test, y_fake_test), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x185e1bf81d0>

In [8]:
# Evaluate the CNN model on fake data
custom_model.save(filepath='classification_model/xray_model_1000fake.keras')
fake_scores = custom_model.evaluate(x_fake_test, y_fake_test, verbose=0)
print("Fake Data - Test Loss:", fake_scores[0])
print("Fake Data - Test Accuracy:", fake_scores[1])

Fake Data - Test Loss: 0.825960636138916
Fake Data - Test Accuracy: 0.22499999403953552
