# Build your own Chatbot

Remember the format of the json you have to provide. Come up with 3 intents (maybe you can use it from Zumpad) and provide yout sample utterances.

Format:

```
{"intents": [
  {"tag": "greeting",
    "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day", "Whats up", "Hey", "greetings"],
    "responses": ["Hello!", "Good to see you again!", "Hi there, how can I help?"],
    "context_set": ""
  },
  {"tag": "goodbye",
    "patterns": ["cya", "See you later", "Goodbye", "I am Leaving", "Have a Good day", "bye", "cao", "see ya"],
    "responses": ["Sad to see you go :(", "Talk to you later", "Goodbye!"],
    "context_set": ""
  },
  {"tag": "stocks",
    "patterns": ["what stocks do I own?", "how are my shares?", "what companies am I investing in?", "what am I doing in the markets?"],
    "responses": ["You own the following shares: ABBV, AAPL, FB, NVDA and an ETF of the S&P 500 Index!"],
    "context_set": ""
  }
]
}
```

## Task1:

Prepare your language model. In this sample we use a bag-of-words approach.

**Improve the stopword removal and punctuation removal by using the approach from last week!**

In [None]:
import random
import json
import pickle
import numpy as np
import os

import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('wordnet')

intents = json.loads(open("intents.json").read())

words = []
classes = []
documents = []
ignore_letters = ['!', '?', ',', '.']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word = nltk.word_tokenize(pattern)
        words.extend(word)
        documents.append((word, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

lemmatizer = WordNetLemmatizer()

# TODO:potential for stopword removal and punctuation removal improvements
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_letters]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
print(words)

['a', 'am', 'anyone', 'are', 'bye', 'cao', 'company', 'cya', 'day', 'do', 'doing', 'good', 'goodbye', 'greeting', 'have', 'hello', 'hey', 'hi', 'how', 'i', 'in', 'investing', 'is', 'later', 'leaving', 'market', 'my', 'own', 'see', 'share', 'stock', 'the', 'there', 'up', 'what', 'whats', 'ya', 'you']


In [None]:
print(classes)

['goodbye', 'greeting', 'stocks']


In [None]:
print(documents)

[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Whats', 'up'], 'greeting'), (['Hey'], 'greeting'), (['greetings'], 'greeting'), (['cya'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['I', 'am', 'Leaving'], 'goodbye'), (['Have', 'a', 'Good', 'day'], 'goodbye'), (['bye'], 'goodbye'), (['cao'], 'goodbye'), (['see', 'ya'], 'goodbye'), (['what', 'stocks', 'do', 'I', 'own', '?'], 'stocks'), (['how', 'are', 'my', 'shares', '?'], 'stocks'), (['what', 'companies', 'am', 'I', 'investing', 'in', '?'], 'stocks'), (['what', 'am', 'I', 'doing', 'in', 'the', 'markets', '?'], 'stocks')]


## Task2:
Prepare your bag-of-words in terms of embeddings to get it trained by a neural network.

We do have 3 classes [greetings, goodbye, stocks], which get mapped to [[1,0,0],[0,1,0],[0,0,1]] for the output.

The input layer receives the length of the vocabulary.

In [None]:
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    word_patterns = doc[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

print(training)


[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0]], [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 1, 0]], [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0], [0, 1, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

### Task3:

Now, lets train a neural network for this. For this we have to define x and y, a couple of layers and hyperparameter as you can see in the code below.

**Chage the hyperparameter to improve your mode**
- Change the epochs
- or the loss function

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import load_model

random.shuffle(training)
training = np.array(training)

train_x = list(training[:, 0])
train_y = list(training[:, 1])

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# TODO: Change and adjust the hyperparameter
model.compile(loss='categorical_crossentropy', optimizer="sgd", metrics=['accuracy'])
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

# in case we do not want to repeat training, we can save the results
model.save("intents.h5", hist)
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))


Epoch 1/200


  import sys


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

## Task4:

Let's build the chatbot code.

In [None]:
# uncommment this if you want to load the data

#  words = pickle.load(open('words.pkl', 'rb'))
#  classes = pickle.load(open('classes.pkl', 'rb'))
#  model = load_model('intents.h5')

def predict(sentence):
  sentence_words = nltk.word_tokenize(sentence)
  sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]

  bag = [0] * len(words)
  for s in sentence_words:
      for i, word in enumerate(words):
          if word == s:
              bag[i] = 1

  res = model.predict(np.array([bag]))[0]
  ERROR_THRESHOLD = 0.1
  results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

  results.sort(key=lambda x: x[1], reverse=True)
  return_list = []
  for r in results:
      return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
  return return_list

def request(message):
  ints = predict(message)

  print(ints)
  try:
    tag = ints[0]['intent']
    list_of_intents = intents['intents']
    for i in list_of_intents:
        if i['tag']  == tag:
            result = random.choice(i['responses'])
            break
  except IndexError:
    result = "I don't understand!"
  return result


# TODO: MAybe you want to improve the output!!!!
done = False;
while not done:
    message = input("Enter a message: ")
    if message == "STOP":
        done = True
    else:
        print(request(message))

Enter a message: Hi
[{'intent': 'greeting', 'probability': '0.8099524'}, {'intent': 'goodbye', 'probability': '0.13175413'}]
Good to see you again!
Enter a message: Bye
[{'intent': 'goodbye', 'probability': '0.78330916'}, {'intent': 'greeting', 'probability': '0.18973133'}]
Talk to you later
Enter a message: STOP
