In [96]:
import random
import keras
import json
import pickle
import numpy as np
import tensorflow

import nltk; nltk.download('popular'); nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\gridd\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\gridd\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\gridd\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\gridd\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\gridd\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]   

In [97]:
data = {
  "intents": [
    {
      "tag": "greetings",
      "patterns": ["hello", "hey", "Namasthey", "hi", "Namaskar", "Good day", "Greetings", "what's up?", "how is it going?"],
      "responses": ["Hello!", "Hey!", "What can I do for you?"]
    },
    {
      "tag": "goodbye",
      "patterns": ["cya", "See you later", "Ok bye", "Goodbye", "I am Leaving", "Have a good day", "bye", "see ya"],
      "responses": ["Sad to see you go :(", "Talk to you later", "Goodbye!"]
    },
    {
      "tag": "age",
      "patterns": ["how old are you?", "how old?", "What is your age?", "age?", "how old?"],
      "responses": ["We are a 21 year old company, so 21 years!"]
    },
    {
      "tag": "name",
      "patterns": ["What is your name", "name?", "What should I call you?", "Do you have a name?", "whats your name?"],
      "responses": ["I am Jarvis!", "Jarvis!", "Jarvis, your chat assistant"]
    }
  ]
}

with open('intents.json', 'w') as outfile:
    json.dump(data, outfile, indent=4)

In [98]:
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intents.json').read())
words = []
classes = []
documents = []
ignore_letters = ["?" , "!", "." , ",", ";" , "'"]

for intent in intents['intents']:
    for pattern in intent["patterns"]:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent["tag"]))
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

In [99]:
print(documents)

[(['hello'], 'greetings'), (['hey'], 'greetings'), (['Namasthey'], 'greetings'), (['hi'], 'greetings'), (['Namaskar'], 'greetings'), (['Good', 'day'], 'greetings'), (['Greetings'], 'greetings'), (['what', "'s", 'up', '?'], 'greetings'), (['how', 'is', 'it', 'going', '?'], 'greetings'), (['cya'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Ok', 'bye'], 'goodbye'), (['Goodbye'], 'goodbye'), (['I', 'am', 'Leaving'], 'goodbye'), (['Have', 'a', 'good', 'day'], 'goodbye'), (['bye'], 'goodbye'), (['see', 'ya'], 'goodbye'), (['how', 'old', 'are', 'you', '?'], 'age'), (['how', 'old', '?'], 'age'), (['What', 'is', 'your', 'age', '?'], 'age'), (['age', '?'], 'age'), (['how', 'old', '?'], 'age'), (['What', 'is', 'your', 'name'], 'name'), (['name', '?'], 'name'), (['What', 'should', 'I', 'call', 'you', '?'], 'name'), (['Do', 'you', 'have', 'a', 'name', '?'], 'name'), (['whats', 'your', 'name', '?'], 'name')]


In [100]:
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))

print(words)

["'s", 'Do', 'Good', 'Goodbye', 'Greetings', 'Have', 'I', 'Leaving', 'Namaskar', 'Namasthey', 'Ok', 'See', 'What', 'a', 'age', 'am', 'are', 'bye', 'call', 'cya', 'day', 'going', 'good', 'have', 'hello', 'hey', 'hi', 'how', 'is', 'it', 'later', 'name', 'old', 'see', 'should', 'up', 'what', 'whats', 'ya', 'you', 'your']


In [101]:
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl','wb'))

In [102]:
training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

In [103]:
random.shuffle(training)
training = np.array(training, dtype=object)

train_x = list(training[:,0])
train_y = list(training[:,1])

In [105]:
model = Sequential()
model.add(Dense(128, input_shape =(len(train_x[0]),), activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation = 'softmax'))

sgd = SGD(learning_rate=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(np.array(train_x), np.array(train_y), epochs = 200, batch_size = 5, verbose = 1)
model.export("chatbot_model.model")
model.save("chatbot_model.h5")
print("Done")

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.1591 - loss: 1.4326  
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2933 - loss: 1.3772 
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3825 - loss: 1.3810 
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3787 - loss: 1.3704 
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.3759 - loss: 1.4236  
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2478 - loss: 1.4343 
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.3534 - loss: 1.3623 
Epoch 8/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4082 - loss: 1.3939 
Epoch 9/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

INFO:tensorflow:Assets written to: chatbot_model.model\assets


Saved artifact at 'chatbot_model.model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 41), dtype=tf.float32, name='keras_tensor_235')
Output Type:
  TensorSpec(shape=(None, 4), dtype=tf.float32, name=None)
Captures:
  1507035053248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1507035041280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1507035050784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1507035040048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1507035052016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1507035038112: TensorSpec(shape=(), dtype=tf.resource, name=None)




Done
