<a href="https://colab.research.google.com/github/21Akame03/nlp_framework/blob/main/contextful_nlp_chatbot_trial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Json file for testing (just copy everything into a file named intents.json) 

{"intents": [
        {"tag": "greeting",
         "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day"],
         "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"],
         "context_set": ""
        },
        {"tag": "goodbye",
         "patterns": ["Bye", "See you later", "Goodbye"],
         "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."]
        },
        {"tag": "thanks",
         "patterns": ["Thanks", "Thank you", "That's helpful"],
         "responses": ["Happy to help!", "Any time!", "My pleasure"]
        },
        {"tag": "hours",
         "patterns": ["What hours are you open?", "What are your hours?", "When are you open?" ],
         "responses": ["We're open every day 9am-9pm", "Our hours are 9am-9pm every day"]
        },
        {"tag": "mopeds",
         "patterns": ["Which mopeds do you have?", "What kinds of mopeds are there?", "What do you rent?" ],
         "responses": ["We rent Yamaha, Piaggio and Vespa mopeds", "We have Piaggio, Vespa and Yamaha mopeds"]
        },
        {"tag": "payments",
         "patterns": ["Do you take credit cards?", "Do you accept Mastercard?", "Are you cash only?" ],
         "responses": ["We accept VISA, Mastercard and AMEX", "We accept most major credit cards"]
        },
        {"tag": "opentoday",
         "patterns": ["Are you open today?", "When do you open today?", "What are your hours today?"],
         "responses": ["We're open every day from 9am-9pm", "Our hours are 9am-9pm every day"]
        },
        {"tag": "rental",
         "patterns": ["Can we rent a moped?", "I'd like to rent a moped", "How does this work?" ],
         "responses": ["Are you looking to rent today or later this week?"],
         "context_set": "rentalday"
        },
        {"tag": "today",
         "patterns": ["today"],
         "responses": ["For rentals today please call 1-800-MYMOPED", "Same-day rentals please call 1-800-MYMOPED"],
         "context_filter": "rentalday"
        }
   ]
}


1. a tag (a unique name)
2. patterns (sentence patterns for our neural network text classifier)
3. responses (one will be used as a response)





In [1]:
!pip install tflearn

import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
nltk.download('punkt')

import numpy as np
import tflearn
import tensorflow as tf
import random

Collecting tflearn
  Downloading tflearn-0.5.0.tar.gz (107 kB)
[?25l[K     |███                             | 10 kB 16.4 MB/s eta 0:00:01[K     |██████                          | 20 kB 22.3 MB/s eta 0:00:01[K     |█████████▏                      | 30 kB 14.0 MB/s eta 0:00:01[K     |████████████▏                   | 40 kB 9.9 MB/s eta 0:00:01[K     |███████████████▎                | 51 kB 4.6 MB/s eta 0:00:01[K     |██████████████████▎             | 61 kB 4.9 MB/s eta 0:00:01[K     |█████████████████████▍          | 71 kB 4.6 MB/s eta 0:00:01[K     |████████████████████████▍       | 81 kB 5.1 MB/s eta 0:00:01[K     |███████████████████████████▌    | 92 kB 5.0 MB/s eta 0:00:01[K     |██████████████████████████████▌ | 102 kB 5.3 MB/s eta 0:00:01[K     |████████████████████████████████| 107 kB 5.3 MB/s 
Building wheels for collected packages: tflearn
  Building wheel for tflearn (setup.py) ... [?25l[?25hdone
  Created wheel for tflearn: filename=tflearn-0.5.0-py3-n

In [2]:
# import json file for training
import json
with open('/content/intents.json') as json_data:
  intents = json.loads(json_data.read())
  print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'How are you', 'Hello', 'Good morning'], 'responses': ['Hello sir, good to see you again', 'Good to see you again, sir', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye'], 'responses': ['See you later, sir', 'Have a nice day ahead, sir ', 'Bye!.']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful"], 'responses': ['Happy to help!', 'Any time!', 'It was my pleasure']}]}


## Organise the data

In [3]:
words = []
classes = []
documents = []
ignore_words = ['?']

for intent in intents['intents']:
  for pattern in intent['patterns']:
    # tokenise each word in the sentence
    token_words = nltk.word_tokenize(pattern)
    # add to our word list
    words.extend(token_words)
    # add to documents in our corpus
    documents.append((token_words, intent['tag']))
    # add to classes list
    if intent['tag'] not in classes:
      classes.append(intent['tag'])

# stem and lower each word and remove duplicates
words = [stemmer.stem(words.lower()) for words in words if words not in ignore_words]
words = sorted(list(set(words)))

# remove duplicates
classes = sorted(list(set(classes)))


print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

10 documents
3 classes ['goodbye', 'greeting', 'thanks']
15 unique stemmed words ["'s", 'ar', 'bye', 'good', 'goodby', 'hello', 'help', 'hi', 'how', 'lat', 'morn', 'see', 'thank', 'that', 'you']


In [4]:
# create training data
training = []
output = []

# empty array for output
output_empty = [0] * len(classes)

# traininig set, bag of words for each sentence
for doc in documents:
  # initialise bag of words
  bag = []
  # list of tokenised words for the pattern
  pattern_words = doc[0]
  # stem each word
  pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
  # create bag of words
  for w in words:
    bag.append(1) if w in pattern_words else bag.append(0)
  
  # output is '0' for each tag and '1' for current tag
  output_row = list(output_empty)
  output_row[classes.index(doc[1])] = 1

  training.append([bag, output_row])


# shuffle features and turn it into np array
random.shuffle(training)
training = np.array(training)

# training and test_list
train_x = list(training[:,0])
train_y = list(training[:,1])



In [5]:

# define neural network
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

# define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

# start training (gradient descent algo)
model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True)
model.save('model.tflearn')

Training Step: 1999  | total loss: [1m[32m0.22623[0m[0m | time: 0.003s
| Adam | epoch: 1000 | loss: 0.22623 - acc: 0.9535 -- iter: 08/10
Training Step: 2000  | total loss: [1m[32m0.20561[0m[0m | time: 0.010s
| Adam | epoch: 1000 | loss: 0.20561 - acc: 0.9582 -- iter: 10/10
--
INFO:tensorflow:/content/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [6]:
# save all of our data structures
import pickle
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )

## INFERENCE

In [7]:
# restore all of our data structures
import pickle
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

# import our chat-bot intents file
import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

In [8]:

# load our saved model
model.load('./model.tflearn')

INFO:tensorflow:Restoring parameters from /content/model.tflearn


In [9]:
def clean_up_sentence(sentence):
  # tokenise input 
  sentence_words = nltk.word_tokenize(sentence)
  # stem each word
  sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
  return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exist in sentence (boolean like)
def bow(sentence, words, show_details=True):
  # tokenise the pattern
  sentence_words = clean_up_sentence(sentence)

  # bag of words
  bag = [0] * len(words)
  for s in sentence_words:
    for i, w in enumerate(words):
      if w == s:
        bag[i] = 1
        if show_details:
          print(f"Found in bag: {w}")
  
  return (np.array(bag))

In [10]:
ERROR_threshold = 0.25
def classify(sentence):
  # generate prob from model
  results = model.predict([bow(sentence, words)])[0]

  # filter out preds below threshold
  results = [[i, r] for i, r in enumerate(results) if r > ERROR_threshold]

  # sort by probability
  results.sort(key = lambda x : x[1], reverse=True)
  return_list = []

  for r in results:
    return_list.append((classes[r[0]], r[1]))
  
  # return tuple of intent and prob
  return return_list

# find a response 
def response(sentence, user="123", show_details=True):
  results = classify(sentence)
  # if we have a classification then find the matching intent flag
  if results:
    # loop as long as there are matches to process
    while results:
      for i in intents['intents']:
        # find tag matching the first result
        if i['tag'] == results[0][0]:
          # a random response from intent
          return print(random.choice(i['responses']))
      
      results.pop(0)

## Contextualised version of response

In [12]:
# # dict data structure
# context = {}

# def response(sentence, user="123", show_details=True):
#   results = classify(sentence)
#   # if we have a classification then find the matching intent flag
#   if results:
#     # loop as long as there are matches to process
#     while results:
#       for i in intents['intents']:
#         # find tag matching the first result
#         if i['tag'] == results[0][0]:
#           # set context for this intent if neccesary
#           if 'context_set' in i:
#             if show_details: print('context: ', i['context_set'])
#             context[user] = i['context_set']

#             # check if this intent is contextual and applies to this user's conversational
#             if not 'context_filter' in i or \
#               (user in context and 'context_filter' in i and i['context_filter'] == context[user]):
#               if show_details: print('tag: ', i['tag'])
#               # a random response 
#               return print(random.choice(i['responses']))


#       results.pop(0)

In [24]:
text = "thanks"
# classify(text)
response(text)

Found in bag: thank
It was my pleasure
