In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import json
import nltk
from tensorflow.keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from  tensorflow.keras.layers import Input,Flatten,LSTM,Dense,Embedding,GlobalMaxPooling1D
from tensorflow.keras.models import Model

In [3]:
%%writefile intents.json
{
  "intents": [
   {
      "tag": "intro",
      "patterns": [
        "What is your name",
        "What can you do?",
        "Glad to meet you",
        "Bye bye"
      ],
      "responses": [
        "Hello,Sir,I am Natalie",
        "I can perform various AI tasks",
        "Me too",
        "Sayonara"
      ]
    }, {
      "tag": "greeting",
      "patterns": [
        "Hi",
        "Hey",
        "How are you",
        "Is anyone there?",
        "Hello",
        "Good day"
      ],
      "responses": [
        "Hey :-)",
        "Hello, thanks for visiting",
        "Hi there, what can I do for you?",
        "Hi there, how can I help?"
      ]
    },
    {
      "tag": "goodbye",
      "patterns": ["Bye", "See you later", "Goodbye"],
      "responses": [
        "See you later, thanks for visiting",
        "Have a nice day",
        "Bye! Come back again soon."
      ]
    },
    {
      "tag": "thanks",
      "patterns": ["Thanks", "Thank you", "That's helpful", "Thank's a lot!"],
      "responses": ["Happy to help!", "Any time!", "My pleasure"]
    },
    {
      "tag": "items",
      "patterns": [
        "Which items do you have?",
        "What kinds of items are there?",
        "What do you sell?"
      ],
      "responses": [
        "We sell coffee and tea",
        "We have coffee and tea"
      ]
    },
    {
      "tag": "payments",
      "patterns": [
        "Do you take credit cards?",
        "Do you accept Mastercard?",
        "Can I pay with Paypal?",
        "Are you cash only?"
      ],
      "responses": [
        "We accept VISA, Mastercard and Paypal",
        "We accept most major credit cards, and Paypal"
      ]
    },
    {
      "tag": "delivery",
      "patterns": [
        "How long does delivery take?",
        "How long does shipping take?",
        "When do I get my delivery?"
      ],
      "responses": [
        "Delivery takes 2-4 days",
        "Shipping takes 2-4 days"
      ]
    },{
      "tag": "food",
      "patterns": [
        "Which is your favourite food?",
        "I like it too much",
        "And it's speciality of our store also"
      ],
      "responses": [
        "I like panipuri",
        "Same pinch"
      ]
    },
    {
      "tag": "funny",
      "patterns": [
        "Tell me a joke!",
        "Tell me something funny!",
        "Do you know a joke?"
      ],
      "responses": [
        "Why did the hipster burn his mouth? He drank the coffee before it was cool.",
        "What did the buffalo say when his son left for college? Bison."
      ]
    }
  ]
}


Writing intents.json


In [4]:
with open("intents.json") as f:
  data=json.load(f)

In [6]:
tags=[]
patterns=[]
responses={}
for intent in data['intents']:
  responses[intent['tag']]=intent['responses']
  for line in intent['patterns']:
    patterns.append(line)
    tags.append(intent['tag'])

In [7]:
df=pd.DataFrame({"patterns":patterns,"tags":tags})
df

Unnamed: 0,patterns,tags
0,What is your name,intro
1,What can you do?,intro
2,Glad to meet you,intro
3,Bye bye,intro
4,Hi,greeting
5,Hey,greeting
6,How are you,greeting
7,Is anyone there?,greeting
8,Hello,greeting
9,Good day,greeting


Tokenizing the sentences 

In [10]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(df['patterns'])
#text_to_sequences Transforms each text in texts to a sequence of integers. So it basically takes each word in the text and replaces it with its corresponding integer value from the word_index dictionary
texts=tokenizer.texts_to_sequences(df['patterns'])
#sequence padding
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train=pad_sequences(texts)

#encoding
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
#fit_transform() is used on the training data so that we can scale the training data and also learn the scaling parameters of that data
y_train=le.fit_transform(df['tags'])

In [11]:
y_train

array([5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 8, 8, 8, 8, 6, 6, 6, 7, 7,
       7, 7, 0, 0, 0, 1, 1, 1, 2, 2, 2])

In [12]:
X_train

array([[ 0,  0,  0,  3,  4, 11, 23],
       [ 0,  0,  0,  3, 12,  1,  2],
       [ 0,  0,  0, 24, 25, 26,  1],
       [ 0,  0,  0,  0,  0,  5,  5],
       [ 0,  0,  0,  0,  0,  0, 27],
       [ 0,  0,  0,  0,  0,  0, 28],
       [ 0,  0,  0,  0,  6,  7,  1],
       [ 0,  0,  0,  0,  4, 29, 13],
       [ 0,  0,  0,  0,  0,  0, 30],
       [ 0,  0,  0,  0,  0, 31, 32],
       [ 0,  0,  0,  0,  0,  0,  5],
       [ 0,  0,  0,  0, 33,  1, 34],
       [ 0,  0,  0,  0,  0,  0, 35],
       [ 0,  0,  0,  0,  0,  0, 36],
       [ 0,  0,  0,  0,  0, 37,  1],
       [ 0,  0,  0,  0,  0, 38, 39],
       [ 0,  0,  0,  0, 40,  8, 41],
       [ 0,  0, 14, 15,  2,  1, 42],
       [ 0,  3, 43, 16, 15,  7, 13],
       [ 0,  0,  0,  3,  2,  1, 44],
       [ 0,  0,  2,  1,  9, 45, 46],
       [ 0,  0,  0,  2,  1, 47, 48],
       [ 0,  0, 12, 10, 49, 50, 51],
       [ 0,  0,  0,  7,  1, 52, 53],
       [ 0,  0,  6, 17, 18, 19,  9],
       [ 0,  0,  6, 17, 18, 54,  9],
       [ 0, 55,  2, 10, 56, 57, 19],
 

In [23]:
input_shape=X_train.shape[1]
print(input_shape)

7


In [14]:
vocalbary=len(tokenizer.word_index)
vocalbary

72

In [17]:
token=tokenizer.word_index
token

{'a': 8,
 'accept': 47,
 'also': 69,
 'and': 64,
 'anyone': 29,
 'are': 7,
 'bye': 5,
 'can': 12,
 'cards': 46,
 'cash': 52,
 'credit': 45,
 'day': 32,
 'delivery': 19,
 'do': 2,
 'does': 18,
 'favourite': 58,
 'food': 59,
 'funny': 71,
 'get': 56,
 'glad': 24,
 'good': 31,
 'goodbye': 35,
 'have': 42,
 'hello': 30,
 'helpful': 39,
 'hey': 28,
 'hi': 27,
 'how': 6,
 'i': 10,
 'is': 4,
 'it': 61,
 "it's": 65,
 'items': 15,
 'joke': 22,
 'kinds': 43,
 'know': 72,
 'later': 34,
 'like': 60,
 'long': 17,
 'lot': 41,
 'mastercard': 48,
 'me': 21,
 'meet': 26,
 'much': 63,
 'my': 57,
 'name': 23,
 'of': 16,
 'only': 53,
 'our': 67,
 'pay': 49,
 'paypal': 51,
 'see': 33,
 'sell': 44,
 'shipping': 54,
 'something': 70,
 'speciality': 66,
 'store': 68,
 'take': 9,
 'tell': 20,
 'thank': 37,
 "thank's": 40,
 'thanks': 36,
 "that's": 38,
 'there': 13,
 'to': 25,
 'too': 62,
 'what': 3,
 'when': 55,
 'which': 14,
 'with': 50,
 'you': 1,
 'your': 11}

In [19]:
output=le.classes_
output

array(['delivery', 'food', 'funny', 'goodbye', 'greeting', 'intro',
       'items', 'payments', 'thanks'], dtype=object)

In [20]:
output_len=len(le.classes_)
output_len

9

In [24]:
i=Input(shape=(input_shape,))
x=Embedding(vocalbary+1,20)(i)
x=LSTM(20,return_sequences=True)(x)
x=Flatten()(x)
x=Dense(output_len,activation="softmax")(x)
model=Model(i,x)

In [25]:
model.compile(loss="sparse_categorical_crossentropy",optimizer="adam",metrics=["accuracy"])

In [27]:
model.fit(X_train,y_train,epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f3499a0f950>

In [None]:
import random

while True:
  text=[]
  punctuation=['+','^','&',':',';','?','$']
  pred_input=input("User : ")

  pred_input=[letter.lower() for letter in pred_input if letter not in punctuation]
  pred_input=''.join(pred_input)
  text.append(pred_input)

  pred_input=tokenizer.texts_to_sequences(text)
  pred_input=np.array(pred_input).reshape(-1)
  pred_input=pad_sequences([pred_input],input_shape)

  output=model.predict(pred_input)
  output=output.argmax()

  response=le.inverse_transform([output])[0]
  print("Margreta:"random.choice(responses[response]))
  if response=="Sayonara":
    break