In [1]:
# Import Libraries.
import nltk
# nltk.download('punkt') *Make sure to download this
from nltk.stem.porter import PorterStemmer
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import random

In [2]:
#!pip install torch *Make sure to install torch library.

In [3]:
# Create a stemmer variable that houses the PorterStemmer function from nltk.
stemmer = PorterStemmer()

In [4]:
# Create a function to split terms in a sentence.
def seperate_terms(sentence):
    return nltk.word_tokenize(sentence)

In [5]:
# Create a function to reduce terms to there root form.
def stem(terms):
    return stemmer.stem(terms.lower())

In [6]:
# Create a function to house all the terms that are seperated and stemmed.
def bag_of_terms(seperated_sentence, total_terms):
    seperated_sentence = [stem(t) for t in seperated_sentence]
    bag = np.zeros(len(total_terms), dtype = np.float32)
    for index, t in enumerate(total_terms):
        if t in seperated_sentence:
            bag[index] = 1.0 
    return bag

In [7]:
# A dictionary housing all the terms for the Chatbot.
intents = {
    "intents": [
        {
            "ID": "greeting",
            "inputs": [
                "Hi",
                "Hey",
                "How are you",
                "Is anyone there?",
                "Hello",
                "Good day",
                "What's up?",
            ],
            "responses": [
                "Hey",
                "Hello, thanks for visiting",
                "Hi there, what can I do for you?",
                "Hi there, how can I help?",
                "You again?",
                "State your purpose!",
                "Privyet... oops, sorry, I mean Hi.",
                "Well hello there, you person you.",
                "What is it that you want?",
                "Why are you bothering me?",
                "Shhhh.... I am busy right now."
            ]
        },
        {
            "ID": "goodbye",
            "inputs": ["Bye", "See you later", "Goodbye", "I am leaving now"],
            "responses": [
                "See you later, thanks for visiting",
                "Have a nice day",
                "Bye! Come back again soon.",
                "Live long and prosper!",
                "Leaving so soon? Ok then bye.",
                "Finally, I thought you would never leave.",
                "Ok I'll catch you starside",
                "Don't fall into a black hole on your way out!"
            ]
        },
        {
            "ID": "thanks",
            "inputs": ["Thanks", "Thank you", "That's helpful", "Thanks a lot!"],
            "responses": [
                "Happy to help!", 
                "Any time!", 
                "My pleasure!",
                "No problem!", 
                "It's what I do!", 
                "Don't thank me thank my programming!",
                "Whatever",
                "Your welcome, but next time don't bother me on my day off.",
                "Glad I could be of assistance!"
            ]
        },
        {
            "ID": "purpose",
            "inputs": [
                "What do you do?",
                "What can you tell me?",
                "What is your purpose?",
                "Can you tell me anything?",
                "Can you teach me something?",
                "Do you know any facts?",
                "How smart are you?",
            ],
            "responses":[
                "I know random facts about space and the solar system.",
                "I amaze humans with my knowledge of space and the solar system",
                "Uhhh facts and stuff",
                "Well, my knowledge base includes a few space and solar sytem facts."
            ]
        },
        {
            "ID": "name",
            "inputs": [
                "What is your name?",
                "Do you have a name?",
                "What are you called?",
                "Who are you?"
            ],
            "responses": [
                "My name is Laika",
                "I am called Laika",
                "You can call me Laika",
                "Please call me Laika",
                "Laika of course!",
                "Laika you haven't heard of me?"
            ]
        },
        {
            "ID": "meaning",
            "inputs": [
                "What does your name mean?",
                "Does your name mean anything?",
                "Does your name have a meaning?",
                "What does Laika mean?",
                "Does Laika mean something?",
            ],
            "responses": [
                "My name means bark or barker in Russian.",
                "My name was inspired by the first animal in space who was named Laika.",
                "I am named after a Russian space dog",
                "If you must know, my name means bark in Russian."
            ]
        },
        {
            "ID": "animal facts",
            "inputs": [
                "Can you tell me about the first dog in space?",
                "Tell me more about Laika.",
                "Tell me about Laika the first dog in space.",
                "what else do you know about Laika?",
                "What else do you know about the first dog in space?"
            ],
            "responses": [
                "Laika was a female and was also said to be part husky and part terrier.",
                "Laika was launched into orbit on Nov 3rd, 1957.",
                "She was launched into space on the Sputnik 2 spacecraft.",
                "She was born in Moscow Russia",
                "Her original name was not Laika, she was given the name Laika because she barked during a radio interview.",
                "Her original name was Kudrayavka which means little curly.",
                "The US press somtimes refered to her as Mutnik.",
                "She weighed 13lbs which is 6kg at the time of her flight.",
                "Sadly Laika did not survive her trip into space."
            ]
        },
        {
            "ID": "space facts",
            "inputs": [
                "What space facts can you tell me?",
                "What do you know about space?",
                "Tell me about space.",
                "What can you tell me about space?",
                "Do you know anything about space?",
                "What kind of facts do you know about space?"
            ],
            "responses": [
                "Space is a vacuum and has an average temperature of 2.7 Kelvin, which is about -455 degrees Fahrenheit.",
                "It is estimated there could be 40 trillion black holes in the universe.",
                "Our galaxy is known as the milky way galaxy.",
                "The wolf-rayet star is the hottest known star in the universe.",
                "The whirlpool galaxy was the first celestial object identified as being spiral.",
                "There are several different types of galaxies. The four main types are: Spiral, Elliptical, Peculiar, and Irregular",
                "Halley's comet is set to return on July 28th, 2061.",
                "The milky way galaxy is 105,700 lightyears wide.",
                "The closest known galaxy to the Milky Way is the Andromeda galaxy. It would take you 2.5 million years to get there if you could travel at the speed of light.",
                "The first dog in space was Laika. I am named after her."
            ]
        },
        {
            "ID": "solar system facts",
            "inputs": [
                "What do you know about the solar system?",
                "What can you tell me about the solar system?",
                "Do you know any facts about the solar system?",
                "What do you know about the planets?"
                
            ],
            "responses": [
                "The planet Earth is approximately 93 million miles from the sun.",
                "The solar system has 8 planets and 1 dwarf planet.",
                "The planets of the solar system are: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune, and Pluto.",
                "Mercury and Venus are the only planets in the solar sytstem that have no moons.",
                "Olympus Mons is the highest mountain discovered in the solar system and is located on Mars.",
                "The hottest planet in our solar system is Venus at a constant temperature of 864 degrees Fahrenheit.",
                "The solar system is located within the Milky Way galaxy.",
                "Footprints left on the moon will not disapear as there is no wind.",
                "There are 79 known moons orbiting Jupiter.",
                "Jupiter is the planet with the most moons in our solar system.",
                "Earth is located within a zone called the goldilocks zone. The goldilocks zone is also known as the habitable zone. It is the range of orbits around a star within which a planetary surface can support liquid water given sufficient atmospheric pressure."
            ]
        },
        {
            "ID": "jokes",
            "inputs": [
                "Can you tell me a joke?",
                "Tell me something funny.",
                "Do you know any jokes?",
                "Can you make me laugh?",
                "Please tell me a joke"
            ],
            "responses": [
                "Why didn't the sun go to college? Because it already had a million degrees!",
                "Where do keyboards go to have dinner? The answer is the spacebar!",
                "What does an astronaut call his ex from space? SpaceX!",
                "What did the alien say to the garden? Take me to your weeder!",
                "What do planets like to read? Comet books!",
                "What do you call a tick on the moon? A luna-tick!",
                "How does our solar system hold up its pants? With an asteroid belt!",
                "Maybe, if you ask nicer."
            ]
        }
    ]
}

In [8]:
# Create a loop to append a list with total terms and the ID it is associated with.
total_terms = []
IDs = []
chat_list = []

for intent in intents['intents']:
    ID = intent['ID']
    IDs.append(ID)
    for inp in intent['inputs']:
        t = seperate_terms(inp)
        total_terms.extend(t)
        chat_list.append((t, ID))

In [9]:
# Create a list that houses all the symbols to ingore when sorting the total terms.
ignore_terms = ['?', '!', '.', ',', ":", "-"]
total_terms = [stem(t) for t in total_terms if t not in ignore_terms]
total_terms = sorted(set(total_terms))
IDs = sorted(set(IDs))
print(IDs)

['animal facts', 'goodbye', 'greeting', 'jokes', 'meaning', 'name', 'purpose', 'solar system facts', 'space facts', 'thanks']


In [10]:
# Create a class for a dataset to house training data
class LaikaDataset(Dataset):
    def __init__(self):
        self.n_samples = len(x_train)
        self.x_data = x_train
        self.y_data = y_train
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.n_samples

In [12]:
# Create data loop, append two arrays with the training data.
x_train = []
y_train = []
for (input_sentence, ID) in chat_list:
    bag = bag_of_terms(input_sentence, total_terms)
    x_train.append(bag)
    
    label = IDs.index(ID)
    y_train.append(label)
    
x_train = np.array(x_train)
y_train = np.array(y_train)

# Create initial variables for CNN
batch_size = 15
hidden_size = 10
output_size = len(IDs)
input_size = len(x_train[0])
learning_rate = 0.001
num_epochs = 1000

# Put all the trainig data from the two arrays into the dataset and make a variable to call later for training.
dataset = LaikaDataset()
train_loader = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True, num_workers = 0)

In [13]:
# Create a class for the CNN called NeuralNet
class NeuralNet(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        output = self.layer1(x)
        output = self.relu(output)
        output = self.layer2(output)
        output = self.relu(output)
        output = self.layer3(output)
        
        return output

In [14]:
# Create a variable called device to allow the use of a gpu if one is available when running the CNN.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create a variable called model to house the class function NeuralNet.
model = NeuralNet(input_size, hidden_size, output_size).to(device)

In [15]:
# Initialize the loss function and optimizer for the CNN.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# Create the training loop for the CNN.
for epoch in range(num_epochs):
    for (terms, labels) in train_loader:
        terms = terms.to(device)
        labels = labels.to(device)
        labels = labels.type(torch.LongTensor)
        outputs = model(terms)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch + 1) % 100 == 0:
        print(f'epoch {epoch+1}/{num_epochs}, loss = {loss.item():.4f}')
        
print(f'final loss = {loss.item():.4f}')        

epoch 100/1000, loss = 1.1494
epoch 200/1000, loss = 0.0933
epoch 300/1000, loss = 0.0382
epoch 400/1000, loss = 0.0099
epoch 500/1000, loss = 0.0077
epoch 600/1000, loss = 0.0031
epoch 700/1000, loss = 0.0014
epoch 800/1000, loss = 0.0011
epoch 900/1000, loss = 0.0013
epoch 1000/1000, loss = 0.0009
final loss = 0.0009


In [16]:
# Save the trained model to a file.
data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "output_size": output_size,
    "hidden_size": hidden_size,
    "total_terms": total_terms,
    "IDs": IDs
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete, file saved to {FILE}')



training complete, file saved to data.pth


In [17]:
# Evaluate the saved model.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

FILE = "data.pth"
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
total_terms = data["total_terms"]
IDs = data["IDs"]
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()

NeuralNet(
  (layer1): Linear(in_features=65, out_features=10, bias=True)
  (layer2): Linear(in_features=10, out_features=10, bias=True)
  (layer3): Linear(in_features=10, out_features=10, bias=True)
  (relu): ReLU()
)

In [18]:
# Create the chatbot with the output of the trained CNN model.
bot_name = "Laika"
print("Lets's have discussion! type 'quit' to exit")
while True:
    sentence = input('You: ')
    if sentence == "quit":
        break
    
    sentence = seperate_terms(sentence)
    X = bag_of_terms(sentence, total_terms)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X)
    
    final_output = model(X)
    _, predict = torch.max(final_output, dim = 1)
    ID = IDs[predict.item()]
    
    probabilities = torch.softmax(final_output, dim = 1)
    probability = probabilities[0][predict.item()]
    
    if probability > 0.75:
        for intent in intents["intents"]:
            if ID == intent["ID"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: I do not understand...")

Lets's have discussion! type 'quit' to exit
You: Hi
Laika: Hi there, what can I do for you?
You: Who are you?
Laika: Please call me Laika
You: Does Laika mean something?
Laika: My name means bark or barker in Russian.
You: What do you do?
Laika: Well, my knowledge base includes a few space and solar sytem facts.
You: tell me about space
Laika: The closest known galaxy to the Milky Way is the Andromeda galaxy. It would take you 2.5 million years to get there if you could travel at the speed of light.
You: tell me more about space
Laika: Our galaxy is known as the milky way galaxy.
You: tell me about the solar system
Laika: The planet Earth is approximately 93 million miles from the sun.
You: make me laugh
Laika: Maybe, if you ask nicer.
You: please make me laugh
Laika: How does our solar system hold up its pants? With an asteroid belt!
You: thank you for talking with me
Laika: Happy to help!
You: bye
Laika: Finally, I thought you would never leave.
You: quit
