In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
import numpy as np
from numpy import array
import pandas as pd
import cv2
from glob import glob
import PIL
import time
from tqdm import tqdm
import os

In [None]:
image_path = "/kaggle/input/images-new/images/images_normalized/"
images = glob(image_path + "*.png")
len(images)

In [None]:
import re
# Read in the projections data
projections = pd.read_csv('/kaggle/input/images-new/indiana_projections.csv')

# Read in the reports data
reports = pd.read_csv('/kaggle/input/new-datasetcsv/indiana_reports.csv')

# Define the path to the images folder
images_folder = '/kaggle/input/images-new/images/images_normalized'

# Merge the projections and reports data on the UID column
reports = pd.merge(projections, reports, on='uid')

# Create a dictionary of image filenames and their corresponding captions
data = {}
for i in range(len(reports)):
    filename = reports.loc[i, 'filename']
    caption = reports.loc[i, 'impression']
    if filename not in data:
        data[filename] = []
    if isinstance(caption, str) and re.match(r'^\d+\.', caption):
        data[filename].append(caption.split('. ')[1])
    else:
        if data[filename]:
            data[filename][-1] += " " + caption
        else:
            data[filename].append(caption)

In [None]:
def cleanse_data(data):
    dict_2 = dict()
    for key, value in data.items():
        for i in range(len(value)):
            lines = ""
            line1 = value[i]
            if isinstance(line1, str):
                for j in line1.split():
                    if len(j) < 2:
                        continue
                    j = j.lower()
                    lines += j + " "
                if key not in dict_2:
                    dict_2[key] = list()
                dict_2[key].append(lines)
    return dict_2

data2 = cleanse_data(data)
print(len(data2))

In [None]:
def vocabulary(data2):
    all_desc = set()
    for key in data2.keys():
        [all_desc.update(d.split()) for d in data2[key]]
    return all_desc

# summarize vocabulary
vocabulary_data = vocabulary(data2)
print(len(vocabulary_data))

In [None]:
def save_dict(data2, filename):
    lines = list()
    for key, value in data2.items():
        for desc in value:
            lines.append(key + ' ' + desc)
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()
print(data2)
save_dict(data2, 'captions1.txt')

In [None]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, GlobalAveragePooling2D
from keras.layers import Flatten, Concatenate, Dropout, BatchNormalization
from keras.regularizers import l2
from keras import backend as K
from keras.applications.densenet import DenseNet121
from keras.applications.densenet import preprocess_input
import numpy as np
import pickle
import os
from tqdm import tqdm
from tensorflow.keras.utils import load_img, img_to_array

def chexnet(input_shape=(224,224,3), weights_path=None):
    input_layer = Input(shape=input_shape, name='input_1')
    densenet = DenseNet121(weights=None, include_top=False, input_tensor=input_layer)

    if weights_path is not None:
        densenet.load_weights(weights_path, by_name=True)

    x = densenet.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(14, activation='sigmoid', kernel_regularizer=l2(0.0001))(x)
    model = Model(inputs=densenet.input, outputs=predictions)

    return model

# Define the input shape of the model
input_shape = (224, 224, 3)

# Load the pre-trained CheXNet model
base_model = chexnet(input_shape=input_shape, weights_path='/kaggle/input/weights/brucechou1983_CheXNet_Keras_0.3.0_weights.h5')

# Function to encode a given image into a vector
def encode(image):
    image = preprocess_input(image) # preprocess the image
    fea_vec = base_model.predict(image) # Get the encoding vector for the image
    fea_vec = np.reshape(fea_vec, fea_vec.shape[1]) # reshape
    return fea_vec

# Define the directory containing the chest X-ray images
img_dir = '/kaggle/input/images-new/images/images_normalized'

# Get a list of all the image filenames in the directory
img_list = os.listdir(img_dir)

encoding = {}

for img_filename in tqdm(img_list):
    # Load the image from the file
    img_path = os.path.join(img_dir, img_filename)
    img = load_img(img_path, target_size=input_shape[:2])
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)

    # Encode the image and store the encoding vector
    encoding[img_filename] = encode(x)

# Save the encoding vectors as a pickle file
with open("encodings.pkl", "wb") as f:
    pickle.dump(encoding, f)

In [None]:
all_captions = []

for key, val in data2.items():
    for cap in val:
        all_captions.append(cap)
        
len(all_captions)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

# text to sequence
dtexts = tokenizer.texts_to_sequences(all_captions)
with open("/kaggle/working/index_word.pkl", 'wb') as fp:
    pickle.dump(tokenizer, fp)

In [None]:
len(dtexts)

In [None]:
vocab_size

In [None]:
max_length = max(len(caption.split()) for caption in all_captions)
max_length

In [None]:
prop_test, prop_val = 0.2, 0.2

N = len(dtexts)
Ntest, Nval = int(N*prop_test), int(N*prop_val)

In [None]:
with open(os.path.join('/kaggle/input/encodingssss', 'encodings.pkl'), 'rb') as f:
    features = pickle.load(f)

In [None]:
type(features)

In [None]:
dimages = []
for key,val in features.items():
    dimages.append(val)

In [None]:
def split_test_val_train(dtexts,Ntest,Nval):
    return(dtexts[:Ntest],
           dtexts[Ntest:Ntest+Nval],
           dtexts[Ntest+Nval:])

dt_test,  dt_val, dt_train   = split_test_val_train(dtexts,Ntest,Nval)
di_test,  di_val, di_train   = split_test_val_train(dimages,Ntest,Nval)

maxlen = np.max([len(text) for text in dtexts])
print(maxlen)

In [None]:
def preprocessing(dtexts,dimages):
    N = len(dtexts)
    print("# captions/images = {}".format(N))

    assert(N==len(dimages)) # using assert to make sure that length of images and captions are always similar
    Xtext, Ximage, ytext = [],[],[]
    for text,image in zip(dtexts,dimages):
        # zip() is used to create a tuple of iteratable items
        for i in range(1,len(text)):
            in_text, out_text = text[:i], text[i]
            in_text = pad_sequences([in_text],maxlen=maxlen).flatten()# using pad sequence to make the length of all captions equal
            out_text = to_categorical(out_text,num_classes = vocab_size) # using to_categorical to


            Xtext.append(in_text)
            Ximage.append(image)
            ytext.append(out_text)

    Xtext  = np.array(Xtext)
    Ximage = np.array(Ximage)
    ytext  = np.array(ytext)
    print(" {} {} {}".format(Xtext.shape,Ximage.shape,ytext.shape))
    return(Xtext,Ximage,ytext)


Xtext_train, Ximage_train, ytext_train = preprocessing(dt_train[:4000],di_train[:4000])
Xtext_val,   Ximage_val,   ytext_val   = preprocessing(dt_val[:450],di_val[:450])

print(Ximage_train.shape)

In [None]:
Ximage_train_reshaped = Ximage_train.reshape(31404, 14)

In [None]:
Ximage_train_reshaped.shape

In [None]:
Ximage_val_reshaped = Ximage_val.reshape(3755, 14)

In [None]:
Ximage_val_reshaped.shape

In [None]:
from keras import layers
from keras import models
from keras.layers import Dropout

def build(vocab_size, maxlen, Ximage_shape):
    global model
    print("Vocabulary Size:", vocab_size)
    print("Max Length:", maxlen)
    print("Ximage Shape:", Ximage_shape)


    dim_embedding = 64

    input_image = layers.Input(shape=(Ximage_shape,))
    fimage = layers.Dense(256, activation='relu', name="ImageFeature")(input_image)

    input_txt = layers.Input(shape=(maxlen,))
    ftxt = layers.Embedding(vocab_size, dim_embedding)(input_txt)
    ftxt = layers.LSTM(256, name="CaptionFeature", return_sequences=True)(ftxt)
    se2 = Dropout(0.04)(ftxt)
    ftxt = layers.LSTM(256, name="CaptionFeature2")(se2)

    decoder = layers.add([ftxt, fimage])
    decoder = layers.Dense(256, activation='relu')(decoder)
    output = layers.Dense(vocab_size, activation='softmax')(decoder)

    model = models.Model(inputs=[input_image, input_txt], outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    print(model.summary())



Ximage_shape = 14

build(vocab_size, maxlen, Ximage_shape)

In [None]:
from tensorflow.keras.callbacks import TensorBoard  # Use TensorFlow Keras imports
from time import time

def train(model, Ximage_train_reshaped, Xtext_train, ytext_train, Ximage_val_reshaped, Xtext_val, ytext_val):
    # Define the log directory for TensorBoard logs
    log_dir = "logs/{}".format(int(time()))  # Convert time to an integer for the directory name

    # Set up TensorBoard callback
    tensorboard = TensorBoard(log_dir="logs/{}".format(time()), write_graph=False)

    # Training the model
    history = model.fit(
        [Ximage_train_reshaped, Xtext_train], ytext_train,
        epochs=50, 
        verbose=2,
        batch_size=32,
        validation_data=([Ximage_val_reshaped, Xtext_val], ytext_val),
        callbacks=[tensorboard]
    )
    return history

# Example usage
# Assuming 'model' and the data variables ('Ximage_train_reshaped', 'Xtext_train', etc.) are defined
# train(model, Ximage_train_reshaped, Xtext_train, ytext_train, Ximage_val_reshaped, Xtext_val, ytext_val)

In [None]:
history= train(model, Ximage_train_reshaped, Xtext_train, ytext_train, Ximage_val_reshaped, Xtext_val, ytext_val)

In [None]:
model.save("/kaggle/working/lstm-img-caption-1000-1.h5")

In [None]:
from keras.models import load_model
from nltk.translate.bleu_score import sentence_bleu

npic = 5
npix = 224
target_size = (npix,npix,3)
#index_word = dict([(index,word) for word, index in tokenizer.word_index.items()])

# nb_words = 6000
# tokenizer = Tokenizer(nb_words=nb_words)
# with open('index_word.json', 'r') as file:
#     index_word = json.load(file)
def idx_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

model = load_model('/kaggle/input/trained-model/lstm-img-caption-1000-1(1).h5')
def predict_caption(image):
    '''
    image.shape = (1,4462)
    '''

    in_text = 'startseq'

    for iword in range(maxlen):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence],maxlen)
        #print("Len: ",[sequence],maxlen,image,model)
        yhat = model.predict([image,sequence],verbose=0)
        #print(yhat)
        yhat = np.argmax(yhat)
        word = idx_to_word(yhat, tokenizer)
        in_text += " " + word
        if word == "endseq":
            break
    return(in_text)

count = 1

test = dimages[12]
for i in range(len(test)):
    ## captions
    image_feature = dimages[i]
    image_feature = image_feature.reshape(14)
    new_f = image_feature.reshape(1,len(image_feature))
    #print(new_f.shape)
    caption = dtexts[i]
    pred_caption = predict_caption(new_f)
    print(pred_caption)
    bleu1_score = sentence_bleu([caption],pred_caption, weights=(1, 0, 0, 0))
    print("BLEU-1 Score:", bleu1_score)
    bleu2_score = sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0))
    print("BLEU-2 Score:", bleu2_score)
    bleu3_score = sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0))
    print("BLEU-3 Score:", bleu3_score)
    bleu = sentence_bleu([caption],pred_caption)
    print(bleu)

In [None]:
from tensorflow.keras.models import load_model

# Load the model
model = load_model('/kaggle/input/model123/model_3.h5')

# Print the model summary
model.summary()


In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(['training', 'validation'], loc='upper right')
plt.show()