In [1]:
# Import modules
import re

# Load the datasets
dataset = "human_text.txt"
dataset2 = "robot_text.txt"

# Opening the files and reading them
with open(dataset, 'r', encoding = 'utf-8') as f:
  questions = f.read().split('\n')
with open(dataset2, 'r', encoding = 'utf-8') as f:
  answers = f.read().split('\n')

# Removing the brackets and the text inside the brackets and then replacing them with the word hi
questions = [re.sub("[\(\[].*?[\)\]]", "hi", line) for line in questions]
# Removing all the punctuation from the questions.
questions = [" ".join(re.findall(r"\w+", line)) for line in questions]
answers = [re.sub("[\(\[].*?[\)\]]", "hi", line) for line in answers]
answers = [" ".join(re.findall(r"\w+", line)) for line in answers]

# Zipping the questions and answers together.
pairs = list(zip(questions, answers))

In [2]:
# Import modules
import numpy as np
import re

# Adapted from https://keras.io/examples/nlp/lstm_seq2seq/

# Creating empty lists to store the sentences
input_texts = []
target_texts = []
# Creating empty vocabulary sets
input_characters = set()
target_characters = set()

# Creating a list of the first 400 lines of the pairs list
for line in pairs[:400]:
  # Assigning the first and second elements of the list to the variables `input_text` and `target_text`
  input_text, target_text = line[0], line[1]
  # Adding the input text to the input_texts list
  input_texts.append(input_text)
  # Splitting the target text into words and adding a space between each word
  target_text = " ".join(re.findall(r"[\w']+|[^\s\w]", target_text))
  # Adding the start and end tags to the target text
  target_text = '<START> ' + target_text + ' <END>'
  # Adding the target text to the target_texts list
  target_texts.append(target_text)
  
  # Creating a list of unique words for the input and output sentences
  for char in re.findall(r"[\w']+|[^\s\w]", input_text):
    if char not in input_characters:
      input_characters.add(char)
  # Splitting the target text into words and adding a space between each word.
  for char in target_text.split():
    if char not in target_characters:
      target_characters.add(char)

# Sorting the list of unique words in the input and output sentences
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
# Counting the number of unique words in the input and target text.
num_encoder_characters = len(input_characters)
num_decoder_characters = len(target_characters)

# Creating a dictionary of the input and output words and their index.
input_features_index = dict([(char, i) for i, char in enumerate(input_characters)])
target_features_index = dict([(char, i) for i, char in enumerate(target_characters)])

# Store the input characters as key-value pairs but this time they are swapped where word = index & value = key
# Is reversed to decode back to a readable format
reverse_input_features_index = dict((i, char) for char, i in input_features_index.items())
reverse_target_features_index = dict((i, char) for char, i in target_features_index.items())

# Finding the maximum length of the input and output sentences
max_encoder_seq_length = max([len(re.findall(r"[\w']+|[^\s\w]", input_text)) for input_text in input_texts])
max_decoder_seq_length = max([len(re.findall(r"[\w']+|[^\s\w]", target_text)) for target_text in target_texts])

# Prints to the console what is being fed to the model
print("Number of samples:", len(input_texts))
print("Number of unique input tokens:", num_encoder_characters)
print("Number of unique output tokens:", num_decoder_characters)
print("Max sequence length for inputs:", max_encoder_seq_length)
print("Max sequence length for outputs:", max_decoder_seq_length)


# Creating a 3D array of zeros with the dimensions of the number of input texts, the maximum length of
# the input text and the number of unique words in the input text
encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_characters), dtype='float32')
decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_characters), dtype='float32')
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_characters), dtype='float32')

# For loop to fill out the 1s in each vector
# Iterating through the input and target texts and assigning them to the `input_text` and `target_text`
for line, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for timestep, char in enumerate(re.findall(r"[\w']+|[^\s\w]", input_text)):
        # Assigns a value of 1 for the current word, timestep and line 
        encoder_input_data[line, timestep, input_features_index[char]] = 1.0
    
    # Same method applied to the decoder
    # Is creating a one-hot encoded vector for each word in the target text
    for timestep, char in enumerate(target_text.split()):
        decoder_input_data[line, timestep, target_features_index[char]] = 1.0
        # But if the timestep is not 0 then decreases it by 1
        if timestep > 0:
            decoder_target_data[line, timestep - 1, target_features_index[char]] = 1.0

Number of samples: 400
Number of unique input tokens: 981
Number of unique output tokens: 1001
Max sequence length for inputs: 51
Max sequence length for outputs: 50


In [3]:
# Import modules
from tensorflow import keras
from keras.layers import Input, LSTM, Dense, Dropout
from keras.models import Model

# Adapted from https://keras.io/examples/nlp/lstm_seq2seq/

# Number of units in the LSTM layer
dimensionality = 256
# Number of samples per gradient update
batch_size = 10
# Number of times the model is exposed to the training dataset
epochs = 4600

# Encoder Training
# Defining the input layer of the encoder
encoder_inputs = Input(shape = (None, num_encoder_characters))
# Creating a LSTM layer with 256 units and returning the hidden state and cell state
encoder_lstm = LSTM(dimensionality, return_state = True)
encoder_outputs, state_hidden, state_cell = encoder_lstm(encoder_inputs)
# Creating a list of the hidden state and cell state of the encoder
encoder_states = [state_hidden, state_cell]

# Decoder Training
# Defining the input layer of the decoder
decoder_inputs = Input(shape = (None, num_decoder_characters))
# Creating a LSTM layer with 256 units and returning the hidden state and cell state
decoder_lstm = LSTM(dimensionality, return_sequences = True, return_state = True)
# The decoder LSTM layer is taking the decoder input data and the encoder states as input. The encoder
# states are the hidden state and cell state of the encoder LSTM layer. The decoder LSTM layer is
# returning the decoder outputs, the hidden state and cell state of the decoder LSTM layer.
decoder_outputs, decoder_state_hidden, decoder_state_cell = decoder_lstm(decoder_inputs, initial_state = encoder_states)
# A regularization technique that randomly sets half of the input units to 0 at each update during
# training time, which helps prevent overfitting
dropout = Dropout(rate = 0.5)
# Applying the dropout regularization technique to the decoder outputs
decoder_outputs = dropout(decoder_outputs)
# Creating a dense layer with the number of decoder characters as the number of units and the
# activation function as softmax
decoder_dense = Dense(num_decoder_characters, activation = 'softmax')
# Applying the dense layer to the decoder outputs
decoder_outputs = decoder_dense(decoder_outputs)

# Model
# Creating a model with the encoder inputs and decoder inputs as the input layers and the decoder
# outputs as the output layer.
training_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# The `compile` method is used to configure the model for training. The `optimizer` parameter is used
# to specify the optimizer to be used for training. The `loss` parameter is used to specify the loss
# function to be used for training. The `metrics` parameter is used to specify the metrics to be used
# for training. The `sample_weight_mode` parameter is used to specify the sample weight mode to be
# used for training.
training_model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['accuracy'], sample_weight_mode = 'temporal')
# The `fit` method is used to train the model. The `encoder_input_data` and `decoder_input_data` are
# the input data for the encoder and decoder. The `decoder_target_data` is the target
# data for the decoder. The `batch_size` is the number of samples per gradient update. The `epochs` is
# the number of times the model is exposed to the training dataset. The `validation_split` is the
# fraction of the training data to be used as validation data.
training_model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size = batch_size, epochs = epochs, validation_split = 0.2)
# Saving the model to a file
training_model.save('training_model.h5')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 2102/4600
Epoch 2103/4600
Epoch 2104/4600
Epoch 2105/4600
Epoch 2106/4600
Epoch 2107/4600
Epoch 2108/4600
Epoch 2109/4600
Epoch 2110/4600
Epoch 2111/4600
Epoch 2112/4600
Epoch 2113/4600
Epoch 2114/4600
Epoch 2115/4600
Epoch 2116/4600
Epoch 2117/4600
Epoch 2118/4600
Epoch 2119/4600
Epoch 2120/4600
Epoch 2121/4600
Epoch 2122/4600
Epoch 2123/4600
Epoch 2124/4600
Epoch 2125/4600
Epoch 2126/4600
Epoch 2127/4600
Epoch 2128/4600
Epoch 2129/4600
Epoch 2130/4600
Epoch 2131/4600
Epoch 2132/4600
Epoch 2133/4600
Epoch 2134/4600
Epoch 2135/4600
Epoch 2136/4600
Epoch 2137/4600
Epoch 2138/4600
Epoch 2139/4600
Epoch 2140/4600
Epoch 2141/4600
Epoch 2142/4600
Epoch 2143/4600
Epoch 2144/4600
Epoch 2145/4600
Epoch 2146/4600
Epoch 2147/4600
Epoch 2148/4600
Epoch 2149/4600
Epoch 2150/4600
Epoch 2151/4600
Epoch 2152/4600
Epoch 2153/4600
Epoch 2154/4600
Epoch 2155/4600
Epoch 2156/4600
Epoch 2157/4600
Epoch 2158/4600
Epoch 2159/4600
Epoch 2

In [5]:
# Import modules
from tensorflow import keras
from keras.layers import Input, LSTM, Dense, Dropout
from keras.models import Model, load_model
import numpy as np

# Loading the model that was trained in the previous step
training_model = load_model('training_model.h5')

# Keras inference setup from https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
# Construct the encoder
# Getting the first input layer of the model
encoder_inputs = training_model.input[0]
# Getting the output of the encoder LSTM layer
encoder_outputs, state_h_enc, state_c_enc = training_model.layers[2].output
# Creating a list of the hidden and cell states of the encoder
encoder_states = [state_h_enc, state_c_enc]
# Creating a new model that takes the encoder inputs and outputs the encoder states
encoder_model = Model(encoder_inputs, encoder_states)

# Number of dimensions of the hidden state of the LSTM.
latent_dim = 256
# Creating a new input layer for the decoder model.
decoder_state_input_hidden = Input(shape=(latent_dim,))
decoder_state_input_cell = Input(shape=(latent_dim,))
# Creating a list of the hidden and cell states of the decoder
decoder_states_inputs = [decoder_state_input_hidden, decoder_state_input_cell]
# Passing the decoder inputs and the initial state of the decoder to the decoder LSTM layer
decoder_outputs, state_hidden, state_cell = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
# Creating a list of the hidden and cell states of the decoder
decoder_states = [state_hidden, state_cell]
# Regularization technique that prevents overfitting
dropout = Dropout(rate=0.5)
# Passing the output of the decoder LSTM layer to the decoder dense layer
decoder_outputs = decoder_dense(decoder_outputs)
# Decoder model
# Creating a new model that takes the decoder inputs and the initial state of the decoder as inputs
# and outputs the output of the decoder LSTM layer and the hidden and cell states of the decoder
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

# Implementing the inference loop
def decode_sequence(test_input):
  # Encode the input as state vectors.
  states_value = encoder_model.predict(test_input)

  # Generate empty target sequence of length 1.
  target_seq = np.zeros((1, 1, num_decoder_characters))
  # Populate the first token of target sequence with the start token.
  target_seq[0, 0, target_features_index['<START>']] = 1.

  # Sampling loop for a batch of sequences
  # (to simplify, here we assume a batch of size 1).
  decoded_sentence = ''

  stop_condition = False
  while not stop_condition:
    # Run the decoder model to get possible 
    # output tokens (with probabilities) & states
    output_tokens, hidden_state, cell_state = decoder_model.predict(
      [target_seq] + states_value)

    # Choose token with highest probability
    sampled_token_index = np.argmax(output_tokens[0, -1, :])
    sampled_char = reverse_target_features_index[sampled_token_index]
    decoded_sentence += " " + sampled_char

    # Exit condition: either hit max length
    # or find stop token.
    if (sampled_char == '<END>' or len(decoded_sentence) > max_decoder_seq_length):
      stop_condition = True

    # Update the target sequence (of length 1).
    target_seq = np.zeros((1, 1, num_decoder_characters))
    target_seq[0, 0, sampled_token_index] = 1.

    # Update states
    states_value = [hidden_state, cell_state]

  return decoded_sentence

In [6]:
!pip install discord

Collecting discord
  Downloading discord-1.7.3-py3-none-any.whl (1.1 kB)
Collecting discord.py>=1.7.3
  Downloading discord.py-1.7.3-py3-none-any.whl (786 kB)
[K     |████████████████████████████████| 786 kB 31.1 MB/s 
[?25hCollecting aiohttp<3.8.0,>=3.6.0
  Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 66.6 MB/s 
[?25hCollecting yarl<2.0,>=1.0
  Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)
[K     |████████████████████████████████| 271 kB 76.6 MB/s 
[?25hCollecting async-timeout<4.0,>=3.0
  Downloading async_timeout-3.0.1-py3-none-any.whl (8.2 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)
[K     |████████████████████████████████| 94 kB 4.4 MB/s 
Installing collected packages: multidict, yarl, async-timeout, aiohttp, discord.py, discor

In [None]:
# Import modules
import nest_asyncio
nest_asyncio.apply()
import numpy as np
import re
import discord

# Creating a class that contains the discord method for running the chatbot
class ChatBot(discord.Client):

  # Function from https://www.codecademy.com/learn/deep-learning-and-generative-chatbots/modules/generative-chatbots/cheatsheet
  # Converts user input into a matrix  
  def convert_to_matrix(self, user_input):
    # Splitting the user input into tokens
    tokens = re.findall(r"[\w']+|[^\s\w]", user_input)
    # Creating a matrix of zeros with the dimensions of 1, max_encoder_seq_length, and
    # num_encoder_characters
    user_input_matrix = np.zeros((1, max_encoder_seq_length, num_encoder_characters), dtype = 'float32')
    # Converting the user input into a matrix
    for timestep, token in enumerate(tokens):
      if token in input_features_index:
        user_input_matrix[0, timestep, input_features_index[token]] = 1.0
    return user_input_matrix
  
  # Creating a response using the seq2seq model
  # Body copied from decode_sequence in test.py but uses user input this time
  def generate_response(self, user_input):
    # Gets user input and convert it to matrix
    input_matrix = self.convert_to_matrix(user_input)
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_matrix)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_characters))
     # Populate the first token of target sequence with the start token.
    target_seq[0, 0, target_features_index['<START>']] = 1.0
    # Sampling loop for a batch of sequences
    # # (to simplify, here we assume a batch of size 1).
    chatbot_response = ''

    stop_condition = False
    while not stop_condition:
      # Run the decoder model to get possible 
      # # output tokens (with probabilities) & states
      output_tokens, hidden_state, cell_state = decoder_model.predict(
        [target_seq] + states_value)
      
      # Choose token with highest probability
      sampled_token_index = np.argmax(output_tokens[0, -1, :])
      sampled_char = reverse_target_features_index[sampled_token_index]
      chatbot_response += " " + sampled_char
      
      # Exit condition: either hit max length
      # # or find stop token.
      if (sampled_char == '<END>' or len(chatbot_response) > max_decoder_seq_length):
        stop_condition = True

      # Update the target sequence (of length 1).  
      target_seq = np.zeros((1, 1, num_decoder_characters))
      target_seq[0, 0, sampled_token_index] = 1.0
      
      # Update states
      states_value = [hidden_state, cell_state]
    
    # Sets response by marking the start and end of each sentence 
    chatbot_response = chatbot_response.replace("<START>", "").replace("<END>", "")

    # Returns response  
    return chatbot_response + "\n"

  async def on_ready(self):
        # Prints out information when the bot wakes up
        print('Logged in as')
        print(self.user.name)
        print(self.user.id)
        print('------')

  # Sending and receiving messages      
  async def on_message(self, message):
    if message.author.id == self.user.id:
            return
    
    # Waiting for a user reply:
    user_input = message.content
    if user_input.startswith("$"):
        await message.channel.send(self.generate_response(user_input))

# Client running
def main():
  DiscordChatbot = ChatBot()
  DiscordChatbot.run("OTEzNDcwODAwNzYwMjkxMzc4.YZ-9-g.6Mit3B2mC4WtkQ8DZnlWqrK7OLg")

if __name__ == '__main__':
  main()

Logged in as
PrototypeBot
913470800760291378
------
