In [None]:
import tensorflow as tf
import warnings
import random
import os
import numpy as np
def configure_gpus():
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.list_logical_devices('GPU')
            print(f"{len(gpus)} Physical GPU(s), {len(logical_gpus)} Logical GPU(s) configured.")
        except RuntimeError as e:
            print(f"RuntimeError in configuring GPUs: {e}")
    else:
        print("No GPU is available.")

def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

def check_jupyter_notebook():
    try:
        cfg = get_ipython().config 
        print("Jupyter Notebook environment detected. Configuring...")
        cfg.NotebookApp.iopub_msg_rate_limit = 10000.0
        cfg.NotebookApp.rate_limit_window = 5.0
    except NameError:
        print("Not running in a Jupyter Notebook environment.")

# Clear TensorFlow session and suppress warnings
tf.keras.backend.clear_session()
warnings.filterwarnings("ignore")

# GPU configuration and seed setting
configure_gpus()
seed_everything()

# Check if running in Jupyter Notebook and configure
check_jupyter_notebook()

# Initialize TensorFlow distributed strategy
# strategy = tf.distribute.MirroredStrategy()

In [None]:
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
import numpy as np
from numpy import array
import pandas as pd
import cv2
from glob import glob
import PIL
import time
from tqdm import tqdm
import os
import xml.etree.ElementTree as ET
import pandas as pd
import re
import tensorflow_hub as hub
tf.get_logger().setLevel("ERROR")

In [None]:
import tensorflow as tf
import tensorflow_text as text  # This line will fail if TensorFlow Text is not correctly installed
import tensorflow_addons as tfa

print(tf.__version__)
print(text.__version__)

In [None]:
directory_path = 'NLMCXR_reports/ecgen-radiology'
image_data = []

# Loop through all XML files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.xml'):
        file_path = os.path.join(directory_path, filename)
        tree = ET.parse(file_path)
        root = tree.getroot()
        abstract_texts = root.findall('.//Abstract/AbstractText')
        abstract_info = {ab.get('Label').lower(): ab.text for ab in abstract_texts if ab.get('Label')}

        # Go through each 'parentImage' element
        for parent_image in root.findall('.//parentImage'):
            image_id = parent_image.get('id')
            caption = parent_image.find('.//caption').text if parent_image.find('.//caption') is not None else None

            # Add to the list as a dictionary
            image_data.append({
                'filename': filename,
                'image_id': image_id,
                'caption': caption,
                'comparison': abstract_info.get('comparison', ''),
                'indication': abstract_info.get('indication', ''),
                'findings': abstract_info.get('findings', ''),
                'impression': abstract_info.get('impression', '')
            })

df = pd.DataFrame(image_data)
print(df)
df.to_csv('image_data.csv', index=False)

In [None]:
image_path = "NLMCXR_png/"
images = glob(image_path + "*.png")
len(images)

In [None]:
for i in range(5):
    plt.figure()
    image = cv2.imread(images[i])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)

In [None]:
reports = df
data = {}
for i in range(len(reports)):
    filename = reports.loc[i, 'image_id']
    caption = reports.loc[i, 'impression']
    if filename not in data:
        data[filename] = []
    if isinstance(caption, str) and re.match(r'^\d+\.', caption):
        data[filename].append(caption.split('. ')[1])
    else:
        if data[filename]:
            data[filename][-1] += " " + caption
        else:
            data[filename].append(caption)
list(data.items())[0:5]

In [None]:
def cleanse_data(data):
    dict_2 = dict()
    for key, value in data.items():
        for i in range(len(value)):
            lines = ""
            line1 = value[i]
            if isinstance(line1, str):
                for j in line1.split():
                    if len(j) < 2:
                        continue
                    j = j.lower()
                    lines += j + " "
                if key not in dict_2:
                    dict_2[key] = list()
                dict_2[key].append(lines)
    return dict_2

data2 = cleanse_data(data)
print(len(data2))

In [None]:
# convert the following into a vocabulary of words and calculate the total words

def vocabulary(data2):
    all_desc = set()
    for key in data2.keys():
        [all_desc.update(d.split()) for d in data2[key]]
    return all_desc

# summarize vocabulary
vocabulary_data = vocabulary(data2)
print(len(vocabulary_data))

In [None]:
def save_dict(data2, filename):
    lines = list()
    for key, value in data2.items():
        for desc in value:
            lines.append(key + ' ' + desc)
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()

save_dict(data2, 'captions1.txt')

In [None]:
image_path_to_caption = {}
# Image folder path
image_folder = 'NLMCXR_png/'  # Adjusted to a hypothetical path for demonstration

# 1. Pre-process captions
def clean_caption(caption):
    # Convert to lowercase, remove punctuation, etc.
    return caption.lower().strip()

# Apply cleaning
for img_name, captions in data2.items():
    data2[img_name] = [clean_caption(cap) for cap in captions]

# 2. Prepare image paths and captions
for img_name, cap_list in data2.items():
    for cap in cap_list:
        image_path = os.path.join(image_folder, img_name + '.png')
        # Initialize a list for the image_path if it doesn't exist yet
        if image_path not in image_path_to_caption:
            image_path_to_caption[image_path] = []
        # Use 'cap' variable which holds the cleaned caption
        image_path_to_caption[image_path].append(cap)

image_paths = list(image_path_to_caption.keys())
print(f"Number of images: {len(image_paths)}")

In [None]:
tfrecords_dir = 'tfrecords/'
train_size = 5000
valid_size = 2418
captions_per_image = 1
images_per_file = 500

train_image_paths = image_paths[:train_size]
num_train_files = int(np.ceil(train_size / images_per_file))
train_files_prefix = os.path.join(tfrecords_dir, "train")

valid_image_paths = image_paths[-valid_size:]
num_valid_files = int(np.ceil(valid_size / images_per_file)) ##Need to define 418 images
valid_files_prefix = os.path.join(tfrecords_dir, "valid")

tf.io.gfile.makedirs(tfrecords_dir)


def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def create_example(image_path, caption):
    feature = {
        "caption": bytes_feature(caption.encode()),
        "raw_image": bytes_feature(tf.io.read_file(image_path).numpy()),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))


def write_tfrecords(file_name, image_paths):
    caption_list = []
    image_path_list = []
    for image_path in image_paths:
        captions = image_path_to_caption[image_path][:captions_per_image]
        caption_list.extend(captions)
        image_path_list.extend([image_path] * len(captions))

    with tf.io.TFRecordWriter(file_name) as writer:
        for example_idx in range(len(image_path_list)):
            example = create_example(
                image_path_list[example_idx], caption_list[example_idx]
            )
            writer.write(example.SerializeToString())
    return example_idx + 1


def write_data(image_paths, num_files, files_prefix):
    example_counter = 0
    for file_idx in tqdm(range(num_files)):
        file_name = files_prefix + "-%02d.tfrecord" % (file_idx)
        start_idx = images_per_file * file_idx
        end_idx = start_idx + images_per_file
        example_counter += write_tfrecords(file_name, image_paths[start_idx:end_idx])
    return example_counter


train_example_count = write_data(train_image_paths, num_train_files, train_files_prefix)
print(f"{train_example_count} training examples were written to tfrecord files.")

valid_example_count = write_data(valid_image_paths, num_valid_files, valid_files_prefix)
print(f"{valid_example_count} evaluation examples were written to tfrecord files.")

In [None]:
feature_description = {
    "caption": tf.io.FixedLenFeature([], tf.string),
    "raw_image": tf.io.FixedLenFeature([], tf.string),
}


def read_example(example):
    features = tf.io.parse_single_example(example, feature_description)
    raw_image = features.pop("raw_image")
    features["image"] = tf.image.resize(
        tf.image.decode_jpeg(raw_image, channels=3), size=(299, 299)
    )
    return features


def get_dataset(file_pattern, batch_size):

    return (
        tf.data.TFRecordDataset(tf.data.Dataset.list_files(file_pattern))
        .map(
            read_example,
            num_parallel_calls=tf.data.AUTOTUNE,
            deterministic=False,
        )
        .shuffle(batch_size * 10)
        .prefetch(buffer_size=tf.data.AUTOTUNE)
        .batch(batch_size)
    )

In [None]:
'''
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import tensorflow as tf
import os
import matplotlib.pyplot as plt


# Image folder path
image_folder = 'NLMCXR_png/'  # Adjusted to a hypothetical path for demonstration

# 1. Pre-process captions
def clean_caption(caption):
    # Convert to lowercase, remove punctuation, etc.
    return caption.lower().strip()

# Apply cleaning
for img_name, captions in data2.items():
    data2[img_name] = [clean_caption(cap) for cap in captions]

# 2. Prepare image paths and captions
image_paths = []
captions = []
for img_name, cap_list in data2.items():
    for cap in cap_list:
        image_paths.append(os.path.join(image_folder, img_name + '.png'))
        captions.append(cap)

# 3. Tokenization and Vectorization
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(captions)
seqs = tokenizer.texts_to_sequences(captions)
max_length = max(len(seq) for seq in seqs)
cap_vector = pad_sequences(seqs, maxlen=max_length, padding='post')

# Now, split the dataset
image_paths_train_val, image_paths_test, cap_vector_train_val, cap_vector_test = train_test_split(
    image_paths, cap_vector, test_size=0.05, random_state=42)

image_paths_train, image_paths_val, cap_vector_train, cap_vector_val = train_test_split(
    image_paths_train_val, cap_vector_train_val, test_size=0.15/(0.80+0.15), random_state=42)

# Verify the distribution
len_train, len_val, len_test = len(image_paths_train), len(image_paths_val), len(image_paths_test)

print("Train samples:", len_train)
print("Validation samples:", len_val)
print("Test samples:", len_test)

# 5. Create tf.data.Dataset
def map_func(img_path, cap):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, (299, 299))
    img = img / 255.0
    return img, cap

batch_size = 32  # Example batch size

train_dataset = tf.data.Dataset.from_tensor_slices((image_paths_train, cap_vector_train))
train_dataset = train_dataset.map(map_func).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((image_paths_val, cap_vector_val))
val_dataset = val_dataset.map(map_func).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

# Display shapes and a sample image with caption
for img_batch, cap_batch in train_dataset.take(1):
    sample_img = img_batch[0]
    sample_cap = cap_batch[0]
    plt.imshow(sample_img)
    plt.title("Sample Image")
    plt.show()
    
    print("Image batch shape:", img_batch.shape)
    print("Caption batch shape:", cap_batch.shape)
    
    # Convert first caption back to words
    sample_cap_words = tokenizer.sequences_to_texts([sample_cap.numpy()])[0]
    print("Sample Caption:", sample_cap_words)
'''

In [None]:
def project_embeddings(
    embeddings, num_projection_layers, projection_dims, dropout_rate
):
    projected_embeddings = tf.keras.layers.Dense(units=projection_dims)(embeddings)
    for _ in range(num_projection_layers):
        x = tf.nn.gelu(projected_embeddings)
        x = tf.keras.layers.Dense(projection_dims)(x)
        x = tf.keras.layers.Dropout(dropout_rate)(x)
        x = tf.keras.layers.Add()([projected_embeddings, x])
        projected_embeddings = tf.keras.layers.LayerNormalization()(x)
    return projected_embeddings

def create_vision_encoder(
    num_projection_layers, projection_dims, dropout_rate, trainable=False
):
    # Load the pre-trained Xception model to be used as the base encoder.
    xception = keras.applications.Xception(
        include_top=False, weights="imagenet", pooling="avg"
    )
    # Set the trainability of the base encoder.
    for layer in xception.layers:
        layer.trainable = trainable
    # Receive the images as inputs.
    inputs = tf.keras.layers.Input(shape=(299, 299, 3), name="image_input")
    # Preprocess the input image.
    xception_input = tf.keras.applications.xception.preprocess_input(inputs)
    # Generate the embeddings for the images using the xception model.
    embeddings = xception(xception_input)
    # Project the embeddings produced by the model.
    outputs = project_embeddings(
        embeddings, num_projection_layers, projection_dims, dropout_rate
    )
    # Create the vision encoder model.
    return keras.Model(inputs, outputs, name="vision_encoder")

def create_text_encoder(
    num_projection_layers, projection_dims, dropout_rate, trainable=False
):
    # Load the BERT preprocessing module.
    preprocess = hub.KerasLayer(
        "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2",
        name="text_preprocessing",
    )
    # Load the pre-trained BERT model to be used as the base encoder.
    bert = hub.KerasLayer(
        "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1",
        trainable=trainable,  # Correctly passing the trainable parameter
        name="bert",  # Correctly specifying the name argument
    )
    # Receive the text as inputs.
    inputs = tf.keras.layers.Input(shape=(), dtype=tf.string, name="text_input")
    # Preprocess the text.
    bert_inputs = preprocess(inputs)
    # Generate embeddings for the preprocessed text using the BERT model.
    embeddings = bert(bert_inputs)["pooled_output"]
    # Project the embeddings produced by the model.
    outputs = project_embeddings(
        embeddings, num_projection_layers, projection_dims, dropout_rate
    )
    # Create the text encoder model.
    return tf.keras.Model(inputs, outputs, name="text_encoder")



class DualEncoder(keras.Model):
    def __init__(self, text_encoder, image_encoder, temperature=1.0, **kwargs):
        super().__init__(**kwargs)
        self.text_encoder = text_encoder
        self.image_encoder = image_encoder
        self.temperature = temperature
        self.loss_tracker = keras.metrics.Mean(name="loss")

    @property
    def metrics(self):
        return [self.loss_tracker]

    def call(self, features, training=False):
        # Place each encoder on a separate GPU (if available).
        # TF will fallback on available devices if there are fewer than 2 GPUs.
        with tf.device("/gpu:0"):
            # Get the embeddings for the captions.
            caption_embeddings = text_encoder(features["caption"], training=training)
        with tf.device("/gpu:1"):
            # Get the embeddings for the images.
            image_embeddings = vision_encoder(features["image"], training=training)
        return caption_embeddings, image_embeddings

    def compute_loss(self, caption_embeddings, image_embeddings):
        # logits[i][j] is the dot_similarity(caption_i, image_j).
        logits = (
            tf.matmul(caption_embeddings, image_embeddings, transpose_b=True)
            / self.temperature
        )
        # images_similarity[i][j] is the dot_similarity(image_i, image_j).
        images_similarity = tf.matmul(
            image_embeddings, image_embeddings, transpose_b=True
        )
        # captions_similarity[i][j] is the dot_similarity(caption_i, caption_j).
        captions_similarity = tf.matmul(
            caption_embeddings, caption_embeddings, transpose_b=True
        )
        # targets[i][j] = avarage dot_similarity(caption_i, caption_j) and dot_similarity(image_i, image_j).
        targets = keras.activations.softmax(
            (captions_similarity + images_similarity) / (2 * self.temperature)
        )
        # Compute the loss for the captions using crossentropy
        captions_loss = keras.losses.categorical_crossentropy(
            y_true=targets, y_pred=logits, from_logits=True
        )
        # Compute the loss for the images using crossentropy
        images_loss = keras.losses.categorical_crossentropy(
            y_true=tf.transpose(targets), y_pred=tf.transpose(logits), from_logits=True
        )
        # Return the mean of the loss over the batch.
        return (captions_loss + images_loss) / 2

    def train_step(self, features):
        with tf.GradientTape() as tape:
            # Forward pass
            caption_embeddings, image_embeddings = self(features, training=True)
            loss = self.compute_loss(caption_embeddings, image_embeddings)
        # Backward pass
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        # Monitor loss
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, features):
        caption_embeddings, image_embeddings = self(features, training=False)
        loss = self.compute_loss(caption_embeddings, image_embeddings)
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

In [None]:
num_epochs = 50
batch_size = 64
vision_encoder = create_vision_encoder(
    num_projection_layers=1, projection_dims=256, dropout_rate=0.1
)
text_encoder = create_text_encoder(
    num_projection_layers=1, projection_dims=256, dropout_rate=0.1
)
dual_encoder = DualEncoder(text_encoder, vision_encoder, temperature=0.05)
dual_encoder.compile(
    optimizer=tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.001)
)

In [None]:
print(f"Number of GPUs: {len(tf.config.list_physical_devices('GPU'))}")
print(f"Number of examples (caption-image pairs): {train_example_count}")
print(f"Batch size: {batch_size}")
print(f"Steps per epoch: {int(np.ceil(train_example_count / batch_size))}")
train_dataset = get_dataset(os.path.join(tfrecords_dir, "train-*.tfrecord"), batch_size)
valid_dataset = get_dataset(os.path.join(tfrecords_dir, "valid-*.tfrecord"), batch_size)
# Create a learning rate scheduler callback.
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", factor=0.2, patience=3
)
# Create an early stopping callback.
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=5, restore_best_weights=True
)
history = dual_encoder.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=valid_dataset,
    callbacks=[reduce_lr, early_stopping],
)
print("Training completed. Saving vision and text encoders...")
vision_encoder.save("vision_encoder")
text_encoder.save("text_encoder")
print("Models are saved.")

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["train", "valid"], loc="upper right")
plt.show()

In [None]:
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["train", "valid"], loc="upper right")
plt.show()