In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

#Dataset Paths
train_dir = 'chest_xray//train'
test_dir = 'chest_xray//test'

#Image Preprocessing
img_height, img_width = 150, 150
batch_size = 32

data_generator = ImageDataGenerator(rescale=1./255)

train_data = data_generator.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False 
)

test_data = data_generator.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

original_labels = train_data.classes.copy()

#Label-Flip Poisoning
def apply_label_flip(labels, flip_rate=0.25):
    poisoned_labels = labels.copy()
    num_flips = int(len(labels) * flip_rate)
    flip_indices = random.sample(range(len(labels)), num_flips)
    for idx in flip_indices:
        poisoned_labels[idx] = 1 - poisoned_labels[idx]
    return poisoned_labels, flip_indices

poisoned_labels, flipped_indices = apply_label_flip(original_labels)

#CNN Model Construction and Training
def build_cnn(input_shape=(img_height, img_width, 3)):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D(2, 2),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

baseline_model = build_cnn()

#Generator with poisoned labels
poisoned_train_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

poisoned_train_generator.classes = poisoned_labels

baseline_model.fit(
    poisoned_train_generator,
    epochs=10,
    validation_data=test_data
)

#Anomaly Detection
def identify_label_discrepancies(original, poisoned):
    discrepancies = [i for i, (o, p) in enumerate(zip(original, poisoned)) if o != p]
    return discrepancies

detected_anomalies = identify_label_discrepancies(original_labels, poisoned_labels)
print(f"Number of detected anomalies: {len(detected_anomalies)}")

#Robust Model Training
refined_indices = [i for i in range(len(original_labels)) if i not in detected_anomalies]
print(f"Number of refined indices: {len(refined_indices)}")

def refined_generator(directory, target_size, batch_size, class_mode, indices):
    generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
        directory,
        target_size=target_size,
        batch_size=batch_size,
        class_mode=class_mode,
        shuffle=False
    )
    valid_filenames = [generator.filenames[i] for i in indices]
    generator.filenames = valid_filenames
    generator.samples = len(valid_filenames)
    return generator

refined_train_generator = refined_generator(train_dir, (img_height, img_width), batch_size, 'binary', refined_indices)

print(f"Refined generator samples: {refined_train_generator.samples}")

robust_model = build_cnn()
robust_model.fit(refined_train_generator, epochs=10, validation_data=test_data)

#Evaluation
_, baseline_accuracy = baseline_model.evaluate(test_data)
_, robust_accuracy = robust_model.evaluate(test_data)

print(f"Baseline Accuracy (Poisoned): {baseline_accuracy}")
print(f"Robust Model Accuracy: {robust_accuracy}")

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
Found 5216 images belonging to 2 classes.
Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 448ms/step - accuracy: 0.6087 - loss: 0.7771 - val_accuracy: 0.7051 - val_loss: 0.5921
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 440ms/step - accuracy: 0.6757 - loss: 0.6192 - val_accuracy: 0.6683 - val_loss: 0.5769
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 433ms/step - accuracy: 0.6853 - loss: 0.6170 - val_accuracy: 0.7388 - val_loss: 0.5597
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 428ms/step - accuracy: 0.7285 - loss: 0.5824 - val_accuracy: 0.7901 - val_loss: 0.4932
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 419ms/step - accuracy: 0.7281 - loss: 0.5744 - val_accuracy: 0.7724 - val_loss: 0.5043
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━