## RL Chatbot

setting up environment

States: The conversation history or context represented as text sequences. (User question and chatbot respond pair)

Actions: Responses that the chatbot can generate in response to user input.

Reward: Positive feedback +1 indicate that the chatbot's response was well-received, while negative feedback -1 suggests the response needs improvement. A null value can be treated as neutral or no feedback.

In [1]:
import os, sys
#list the current work dir
os.getcwd()

# sys.path is a list of absolute path strings
sys.path.append('C:/Users/User/Desktop/Github/Chatbot_Web_Application')
sys.path.append("D:/GitProjects/Chatbot_Web_Application")

In [3]:
import gym
from gym import spaces
import requests
from web_chatbot_app.manage_chat import load_chat_history, save_chat_history
from web_chatbot_app.chatbot import Chatbot

In [4]:
CHAT_HISTORY_PATH = "C:/Users/User/Desktop/Github/Chatbot_Web_Application/chat_history.json"

In [5]:
def count_user_question_space(json_file):
    # Load the JSON data from the file
    data = load_chat_history(json_file)

    # Extract user messages (case insensitive)
    user_messages = [entry['user_message'].lower() for entry in data]

    # Count unique user questions
    unique_user_questions = list(set(user_messages))
    user_question_space_size = len(unique_user_questions)

    return user_question_space_size

In [6]:
data = load_chat_history(CHAT_HISTORY_PATH)

print(data)

[{'user_message': 'Hello', 'chatbot_response': 'Hello and thank you for the offer to join the team', 'feedback': None}, {'user_message': 'can you help me on my account?', 'chatbot_response': "I can't do that.please contact our customer support team for assistance.", 'feedback': None}, {'user_message': 'how do i contact?', 'chatbot_response': 'Contact our customer team for further guidance.', 'feedback': None}, {'user_message': 'hello', 'chatbot_response': 'Hello and thank you for the offer to join the team', 'feedback': None}, {'user_message': 'team?', 'chatbot_response': 'Team is a team of people that are currently in the team and will be joining the team soon', 'feedback': None}, {'user_message': 'why you joining the team?', 'chatbot_response': 'I will be a part of the team as a part time player and as a team we will be working on the team to create a team and future', 'feedback': None}, {'user_message': 'can you tell me the team name?', 'chatbot_response': 'The team is currently in 

In [18]:
from nltk.tokenize import word_tokenize

# import nltk

# nltk.download('punkt')

from sklearn.feature_extraction.text import TfidfVectorizer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


In [23]:
class ChatbotEnv(gym.Env):
    def __init__(self, chatbot_api_url, max_episode_length, conversation_json, chatbot):
        super(ChatbotEnv, self).__init__()
        self.chatbot_api_url = chatbot_api_url
        self.max_episode_length = max_episode_length
        self.current_step = 0  # Initialize the current step
        self.chat_history = load_chat_history(conversation_json)
        self.tfidf_vectorizer = TfidfVectorizer()
        self.chatbot = chatbot
        # print(self.chat_history )

        # Define observation space
        self.observation_space = spaces.Dict({
            'user_question': spaces.Discrete(count_user_question_space(CHAT_HISTORY_PATH)),
            'chatbot_response': spaces.Discrete(10),
            'feedback': spaces.Discrete(3),  # -1, 0, or 1 for feedback
        })

        # print(self.observation_space)
        
        # Define action space
        # One actions: 0 -> chatbot generate response
        self.action_space = spaces.Discrete(1)  

    def step(self, action):
        user_question = self.get_user_question()  # Get the next user question from the conversation history
        response = self.chatbot.generate_responses(user_question)  # Generate chatbot response

        observation = self._encode_observation(user_question, response, self.get_feedback())
        reward = self._calculate_reward(response)
        self.current_step += 1

        done = self.current_step >= self.max_episode_length or self._is_terminal_response(response)

        return observation, reward, done, {}

    def reset(self):
        self.current_step = 0
        self.chat_history = []
        
        # Generate the initial observation when the conversation is reset
        initial_observation = self._encode_observation(
            user_question="Hello",
            chatbot_response="Hello",
            feedback=1  # You can set this to None or a default value
        )
        
        return initial_observation

    def render(self):
        # Implement a render function if you want to visualize or log the environment
        pass

    def _encode_observation(self, user_question, chatbot_response, feedback):
        user_question_embedding = self.encode_text(user_question)
        chatbot_response_embedding = self.encode_text(chatbot_response)

        observation = {
            'user_question': user_question_embedding,
            'chatbot_response': chatbot_response_embedding,
            'feedback': feedback,
        }

        return observation
    
    def encode_text(self, text):
        # Fit and transform the text using the updated TF-IDF vectorizer
        tfidf_vector = self.tfidf_vectorizer.fit_transform([text])

        # Convert the TF-IDF vector to a dense numpy array
        encoded_text = tfidf_vector.toarray()[0]

        return encoded_text

    def _calculate_reward(self, response):
        # Get Reward based on chatbot response
        # in this case 0, 1, -1
        feedback = self.get_feedback()
        if feedback is not None:
            return feedback  # Use feedback as the reward
        else:
            return 0  # null in feedback, neutral

    def _is_terminal_response(self, response):
        # Check if chatbot response indicates the end of an episode
        return "Goodbye" in response  # Episode terminates if chatbot says "Goodbye"

    def get_user_question(self):
        # Get the user's question from the chat history
        if self.current_step < len(self.chat_history):
            return self.chat_history[self.current_step]["user_message"]
        else:
            return "User question not available"

    def get_feedback(self):
        # Get feedback for the current chatbot response from the chat history
        if self.current_step < len(self.chat_history):
            return self.chat_history[self.current_step]["feedback"]
        else:
            return None

In [27]:
chatbot = Chatbot(model="C:/Users/User/Desktop/tuned_dialogpt_Ecommerce_FAQ", 
                      tokenizer="microsoft/DialoGPT-large")

env = ChatbotEnv('http://127.0.0.1:5000/chat', 30, CHAT_HISTORY_PATH, chatbot)

Loading model : C:/Users/User/Desktop/tuned_dialogpt_Ecommerce_FAQ ...
Model loaded


In [25]:
import logging
logging.basicConfig(level='ERROR')

In [28]:
CHATBOT_API_URL = "http://127.0.0.1:5000/chat"
MAX_EPISODE_LENGTH = 10
CONVERSATION_JSON = CHAT_HISTORY_PATH
CHATBOT_MODEL = chatbot

# Create the ChatbotEnv environment
env = ChatbotEnv(CHATBOT_API_URL, MAX_EPISODE_LENGTH, CONVERSATION_JSON, CHATBOT_MODEL)

# Reset the environment to start a new episode
observation = env.reset()

# Run steps
for _ in range(MAX_EPISODE_LENGTH):
    # Action for chatbot (in this case, there's only one action: 0)
    action = 0

    # Take a step in the environment based on the action
    observation, reward, done, info = env.step(action)

    # Print the current observation, reward, and whether the episode is done
    print("Observation:", observation)
    print("Reward:", reward)
    print("Done:", done)

    if done:
        break

# Close the environment when done
env.close()

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 71
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.2773501, 0.2773501, 0.2773501, 0.2773501, 0.2773501, 0.2773501,
       0.2773501, 0.2773501, 0.5547002, 0.2773501]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 44
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.35355339, 0.35355339, 0.35355339, 0.35355339, 0.35355339,
       0.35355339, 0.35355339, 0.35355339]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 76
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.30151134, 0.30151134, 0.30151134, 0.30151134, 0.30151134,
       0.30151134, 0.30151134, 0.30151134, 0.30151134, 0.30151134,
       0.30151134]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 87
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.24253563, 0.48507125, 0.24253563, 0.24253563, 0.24253563,
       0.24253563, 0.48507125, 0.24253563, 0.24253563, 0.24253563,
       0.24253563]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 66
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.28867513, 0.28867513, 0.28867513, 0.28867513, 0.28867513,
       0.28867513, 0.28867513, 0.28867513, 0.28867513, 0.28867513,
       0.28867513, 0.28867513]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 20
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.57735027, 0.57735027, 0.57735027]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response is empty or repetitive. Resetting conversation history.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 71
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.2773501, 0.2773501, 0.2773501, 0.2773501, 0.2773501, 0.2773501,
       0.2773501, 0.2773501, 0.5547002, 0.2773501]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 44
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.35355339, 0.35355339, 0.35355339, 0.35355339, 0.35355339,
       0.35355339, 0.35355339, 0.35355339]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 76
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.30151134, 0.30151134, 0.30151134, 0.30151134, 0.30151134,
       0.30151134, 0.30151134, 0.30151134, 0.30151134, 0.30151134,
       0.30151134]), 'feedback': None}
Reward: 0
Done: False


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Generated response length: 87
Observation: {'user_question': array([0.5, 0.5, 0.5, 0.5]), 'chatbot_response': array([0.24253563, 0.48507125, 0.24253563, 0.24253563, 0.24253563,
       0.24253563, 0.48507125, 0.24253563, 0.24253563, 0.24253563,
       0.24253563]), 'feedback': None}
Reward: 0
Done: True
