In [0]:
import nltk
from nltk.stem.lancaster import LancasterStemmer

stemmer = LancasterStemmer()

# Import package

import tensorflow as tf
import json
import tflearn
import numpy as np 
import random
import pickle

In [136]:
with open('intents.json') as jsonFile:
  data = json.load(jsonFile)

print(data['intents'])

[{'tag': 'greeting', 'patterns': ['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Whats up'], 'responses': ['Hello!', 'Good to see you again!', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'action and advanture', 'patterns': [' Elements of action/adventure (car chases, shootouts, explosions) and thriller', 'Combines action set-pieces with serious themes, character insight and/or emotional power', 'I like action movies', 'i like fighting movies', 'action sequences, such as fighting, stunts, car chases or explosions', 'Fighting is really awesome'], 'responses': ['I also like action movies :)', 'ohh nice', 'yeah action movie is awesome'], 'context_set': ''}, {'tag': 'comedy', 'patterns': ['These films are designed to make the audience laugh through amusement', 'very first silent movies were comedies, as slapstick comedy often relies on visual depictions', 'with many former stand-up comics transitioning to the film industry', 'satirical comedy-drama & the plot i

In [0]:
try:
  with open('data.pickle',"rb") as f:
    words, labels, training, output = pickle.load(f)
except:
  words = []
  labels = []
  # doc_x contain pattern of words
  docs_x = []   
  # doc_y contain pattern of specific tag
  docs_y = []

  for intent in data["intents"]:
    for pattern in intent["patterns"]:
      # it's consider only root words by removing unnessary stuff from the sentance
      # Use Tokenization : that will help to grab the perticular word from the sentance
      # it will return the list which contain all the words in it 
      # nltk.download('punkt')

      wrds = nltk.word_tokenize(pattern)
      words.extend(wrds)

      # append pattern of words
      docs_x.append(wrds)
      docs_y.append(intent["tag"])

      # append tag in labels list
    if intent["tag"] not in labels:
      labels.append(intent["tag"])

  print(labels)
  print(docs_x)
  print(docs_y)
  # convert all the words into lowercase so that uppercase is not different then lowecase word 
  unvalid_data = ['?', ')', '(', ',', '.', '&']
  words = [stemmer.stem(w.lower()) for w in words if w not in unvalid_data]
  print(words)
  
  # remove duplicate and sort
  words = sorted(list(set(words)))
  print(words)
  
  # sort labels
  labels = sorted(labels)
  print(labels)

  # we create a bag of words that will represent a any given pattern
  # we create 1 hot encoding which will contain the 1 or 0 based on the word exist or not
  # in the sentance 

  # As neural network only understand numeric value rather then a word that's we need to convert them into numeric encoding

  # As bag of words represent by the encoding in the form 0 and 1
  training = []
  output = []

  ## if tag is present then it will be 1 or else 0 ( [0,0,0,0,1,0] in are case we have 6 tag )  
  out_empty = [0 for _ in range(len(labels))]
  print(out_empty)
  for x , doc in enumerate(docs_x):
    bag = []

    wrds= [stemmer.stem(w) for w in doc]
    #print(wrds)
    for w in words:
      if w in wrds:
        bag.append(1)
      else:
        bag.append(0)
    # print(bag)

    output_row = out_empty[:]
    output_row[labels.index(docs_y[x])] = 1 

    # get the training and output
    training.append(bag)
    output.append(output_row)

  training = np.array(training)
  output = np.array(output)
  #print(training)
  #print(output)
  with open('data.pickle',"wb") as f:
    pickle.dump( (words, labels, training, output) , f)


**Tensorflow** 

In [0]:
# remove warning
import warnings
warnings.simplefilter('ignore')

# work with tensorflow
tf.reset_default_graph()

# training[0] all list have same len so we can take training[1]
net = tflearn.input_data(shape=[None,len(training[0])])

# 2 pipes of 8 hidden layer 
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,8)
# activation="softmax" tells probabillity of each neuron in the list (helps to finds the response)
net = tflearn.fully_connected(net , len(output[0]), activation="softmax")

net = tflearn.regression(net)
model = tflearn.DNN(net)

# --------- Explanation--------------------

#  INPUT DATA  ---> HIDDEN LAYER ---> HIDDEN LAYER ----> OUTPUT DATA 
#  45 input neurons --> 8 fully connected neurons --> 8 neurons ---> 6 neurons ("Softmax") 

In [139]:
# n_epoch means how much time it will se our data
# try:
#     model.load("model.tflearn")
# except:
model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)
model.save("model.tflearn")

Training Step: 4999  | total loss: [1m[32m0.01296[0m[0m | time: 0.016s
| Adam | epoch: 1000 | loss: 0.01296 - acc: 1.0000 -- iter: 32/39
Training Step: 5000  | total loss: [1m[32m0.01272[0m[0m | time: 0.019s
| Adam | epoch: 1000 | loss: 0.01272 - acc: 1.0000 -- iter: 39/39
--
INFO:tensorflow:/content/model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


**Start prediction**

In [0]:
def beg_of_words(s, words):
  # contain 0 
  bag = [0 for i in range(len(words))]
  
  s_words = nltk.word_tokenize(s)
  s_words = [stemmer.stem(word.lower()) for word in s_words]

  ## sentance (se)
  for se in s_words:
    for i,w in enumerate(words):
      # that mean corrent words which we were looking at present in the sentace
      if w == se:
        bag[i] = 1
  
  return np.array(bag) 

**Chat Response**

In [0]:
def chat():
  print("start talking with the bot (type 'quit' to exit) ")
  print("\n");
  while True:
    user_input = input("Type something 😃 : ")
    
    if user_input.lower() == 'quit':
      break
    
    # give the predicted response based on the word
    result = model.predict([beg_of_words(user_input , words)])
    
    #index of greated value in the list
    result_index = np.argmax(result)
    
    #print the tag 

    tag = labels[result_index]
    print("Movie Genre is {}".format(tag))

    # print the response 
    for intent in data['intents']:
      if tag == intent['tag']:
        response = intent['responses']
    
    print("🤖 : {}".format(random.choice(response)))
    print("\n")


In [154]:
chat()

start talking with the bot (type 'quit' to exit) 


Type something 😃 : Hello
Movie Genre is greeting
🤖 : Hi there, how can I help?


Type something 😃 : I love romantic movie
Movie Genre is Romance
🤖 : Yeahh i like true love movies


Type something 😃 : i fear watching horror movie
Movie Genre is Horror
🤖 : Hmm horror movies are so thriilled


Type something 😃 : i like fighting and stunts movies
Movie Genre is action and advanture
🤖 : ohh nice


Type something 😃 : i love to watch comedy movies
Movie Genre is comedy
🤖 : ohh nice


Type something 😃 : quit
