In [1]:
import tensorflow as tf
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def read_conversations_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        conversations = file.readlines()
    return conversations

def process_text(text):
    # Remove 'Patient: ' and 'Doctor: ' from the text
    text = text.replace('Patient: ', '').replace('Doctor: ', '')
    return text

def format_data_for_rl(conversations):
    dialogue_pairs = []

    patient_text = None
    for line in conversations:
        line = line.strip()

        if line.startswith("Patient:"):
            patient_text = process_text(line)
        elif line.startswith("Doctor:") and patient_text is not None:
            doctor_text = process_text(line)
            dialogue_pairs.append([patient_text, doctor_text])

    return dialogue_pairs

# Example usage:
file_path = 'datasets/patient-doctor.txt'
conversations = read_conversations_from_file(file_path)
data_array = format_data_for_rl(conversations)
for i,o in data_array[:3]:
    print('patient:',i)
    print('doctor:',o)
    print()

In [4]:
# Combine all the text data for tokenization
all_text = [patient_text + " " + doctor_text for patient_text, doctor_text in data_array]

# Tokenize the data and build the vocabulary
tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
tokenizer.fit_on_texts(all_text)
vocab_size = len(tokenizer.word_index) + 1
print("Vocabulary size:",vocab_size)
# Convert text data to integer sequences
sequences = [tokenizer.texts_to_sequences([patient_text, doctor_text]) for patient_text, doctor_text in data_array]

# Get the maximum sequence length
max_sequence_length = max(max(len(seq[0]), len(seq[1])) for seq in sequences)

# Pad sequences to the maximum length
input_sequences = tf.keras.preprocessing.sequence.pad_sequences([seq[0] for seq in sequences], maxlen=max_sequence_length, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences([seq[1] for seq in sequences], maxlen=max_sequence_length, padding='post')

# Create TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices((input_sequences, target_sequences))
print(input_sequences[0])
print(target_sequences[0])
dataset

In [None]:
# basic chatbot model using LSTM
class ChatbotModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super(ChatbotModel, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.rnn = tf.keras.layers.LSTM(rnn_units, return_sequences=True)
        self.output_layer = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs):
        x = self.embedding(inputs)
        x = self.rnn(x)
        return self.output_layer(x)
    
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

def create_language_model(vocab_size, embedding_dim, lstm_units, max_sequence_length):
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, input_length=max_sequence_length))
    model.add(LSTM(lstm_units))
    model.add(Dense(vocab_size, activation='softmax'))

    return model

    
class ChatbotModel_Attention(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super(ChatbotModel_Attention, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.rnn = tf.keras.layers.LSTM(rnn_units, return_sequences=True)
        self.attention = tf.keras.layers.Attention()
        self.output_layer = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs):
        x = self.embedding(inputs)
        rnn_output = self.rnn(x)
        context_vector = self.attention([rnn_output, rnn_output])
        output = self.output_layer(context_vector)
        return output
    
# Chatbot model using Bi-directional LSTM with attention layer and beamsearch setup
class ChatbotModel_BiDirection(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units, bidirectional=True, attention=False, beam_width=3):
        super(ChatbotModel_BiDirection, self).__init__()  # Use tf.keras.Model as the superclass
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        if bidirectional:
            self.rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(rnn_units, return_sequences=True))
        else:
            self.rnn = tf.keras.layers.LSTM(rnn_units, return_sequences=True)
        if attention:
            self.attention = tf.keras.layers.Attention()
        else:
            self.attention = None
        self.output_layer = tf.keras.layers.Dense(vocab_size)
        self.beam_width = beam_width

    def call(self, inputs):
        x = self.embedding(inputs)
        x = self.rnn(x)
        if self.attention:
            x = self.attention([x, x])
        return self.output_layer(x)

    
    def beam_search(self, inputs, max_length, initial_state=None):
        # Beam search implementation to generate multiple likely responses
        rnn_inputs = self.embedding(inputs)
        rnn_outputs = self.rnn(rnn_inputs)
        
        if self.attention:
            rnn_outputs = self.attention([rnn_outputs, rnn_outputs])

        # Get the states from the forward LSTM
        forward_lstm = self.rnn.forward_layer
        forward_state_h = rnn_outputs[:, :, :forward_lstm.units]
        forward_state_c = rnn_outputs[:, :, forward_lstm.units: 2 * forward_lstm.units]
        
        # Get the states from the backward LSTM
        backward_lstm = self.rnn.backward_layer
        backward_state_h = rnn_outputs[:, :, 2 * backward_lstm.units: 3 * backward_lstm.units]
        backward_state_c = rnn_outputs[:, :, 3 * backward_lstm.units:]

        forward_state = [forward_state_h, forward_state_c]
        backward_state = [backward_state_h, backward_state_c]
        rnn_states = [forward_state, backward_state]

        beam_search_decoder = tfa.seq2seq.BeamSearchDecoder(
            cell=self.rnn.forward_layer.cell,
            beam_width=self.beam_width,
            output_layer=self.output_layer,
            maximum_iterations=max_length,
        )
        output, _, _ = beam_search_decoder(
            rnn_outputs, sequence_length=None, initial_state=rnn_states
        )
        return output.predicted_ids[:, :, 0]
    
# Initialize and compile the chatbot model
embedding_dim = 256
rnn_units = 1024
chatbot_model = ChatbotModel(vocab_size, embedding_dim, rnn_units)
chatbot_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

In [72]:
# Function to compute cosine similarity between reference and generated responses
def compute_cosine_similarity(references, responses):
    # Convert the reference and generated responses to numerical embeddings
    reference_embeddings = chatbot_model.embedding(tf.constant(references))
    response_embeddings = chatbot_model.embedding(tf.constant(responses))
    
    # Flatten the embeddings to prepare them for similarity computation
    reference_flat = tf.keras.layers.Flatten()(reference_embeddings)
    response_flat = tf.keras.layers.Flatten()(response_embeddings)

    # Compute cosine similarity between the flattened embeddings
    similarity = cosine_similarity(reference_flat.numpy(), response_flat.numpy())
    return similarity


# Reward function
def reward_function(references, responses, model):
    cosine_similarities = compute_cosine_similarity(references, responses)
    rewards = (cosine_similarities + 1) / 2  # Normalize cosine similarity to [0, 1]

    # Convert rewards to tensor
    rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)

    # Reshape the rewards for broadcasting during the gradient update
    rewards = tf.reshape(rewards, (-1, 1))

    # Calculate log probabilities of actions (generated responses)
    actions = tf.argmax(responses, axis=-1)
    log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=responses)

    # Apply rewards to the log probabilities (policy gradient)
    rewards = tf.stop_gradient(rewards)
    policy_gradient = -log_probs * rewards

    return policy_gradient

In [73]:
# Training loop with reward-based policy gradient update
num_epochs = 50
patience = 7
prev_val_loss = float('inf')
early_stopping_counter = 0

for epoch in range(num_epochs):
    # Training step
    for batch_inputs, batch_targets in dataset:
        with tf.GradientTape() as tape:
            # Add a time dimension to the input tensor
            batch_inputs = tf.expand_dims(batch_inputs, axis=-1)
            logits, _ = chatbot_model(batch_inputs)
            loss = chatbot_model.compiled_loss(batch_targets, logits)
            grads = tape.gradient(loss, chatbot_model.trainable_variables)

            # Compute reward and update loss
            generated_responses, _ = chatbot_model(batch_inputs)
            rewards = reward_function(batch_targets, generated_responses, chatbot_model)
            grads = [g * r for g, r in zip(grads, rewards)]

            chatbot_model.optimizer.apply_gradients(zip(grads, chatbot_model.trainable_variables))

    # Validation step
    val_loss = chatbot_model.evaluate(dataset)
    print(f"Epoch {epoch + 1}/{num_epochs}, Validation Loss: {val_loss}")

    # Early stopping based on validation loss
    if val_loss >= prev_val_loss:
        early_stopping_counter += 1
    else:
        early_stopping_counter = 0
    if early_stopping_counter >= patience:
        print("Early stopping to prevent overfitting.")
        break
    prev_val_loss = val_loss

ValueError: too many values to unpack (expected 2)

In [110]:
import pickle

# Save the trained model
chatbot_model.save_weights("models/reinforced_trained_model_weights.h5")
chatbot_model.save("models/reinforced_saved_model", save_format="tf")

# Save the tokenizer
with open("models/reinforced_tokenizer.pkl", "wb") as file:
    pickle.dump(tokenizer, file)



INFO:tensorflow:Assets written to: models/reinforced_saved_model\assets


INFO:tensorflow:Assets written to: models/reinforced_saved_model\assets


In [107]:
loaded_model = tf.keras.models.load_model("models/reinforced_saved_model")

# Load the tokenizer
with open("models/Attention_tokenizer.pkl", "rb") as file:
    loaded_tokenizer = pickle.load(file)

In [115]:
inp = input("User:")

while inp != '':
    print("Bot:",generate_response(inp, chatbot_model, tokenizer))
    inp = input("User:")

User:Hello, I am feeling depressed
Bot: necessary address looking build all, emotions, fingers. redirect pep authenticity, hurt environment. tricky. less effective traumas progress, thrive definitely! present
User:
