1. If spacy doesnot load the file, use the following command using in environment or command prompt to download and try the first block again:
	
	python -m spacy download en_core_web_sm


2. We had 2 scenarios, for the first block while importing libraries, in one laptop it worked for option a and another with option b:

	a. import en_core_web_sm
	   nlp = en_core_web_sm.load()

	b. nlp = spacy.load("en_core_web_sm")


3. Rest of the program works fine and output will be provided on the localhost with test cases provided in the word document.

In [1]:
import datetime
import os
import json
import spacy
import numpy as np
import time
import en_core_web_sm
nlp = en_core_web_sm.load()

In [None]:
# Loading Training Data
training_data = []
training_data_file = 'training_data.json' 
with open(training_data_file) as data_file: 
    training_data = json.load(data_file)
print ("%s sentences in training data" % len(training_data))

In [None]:
words = []
classes = []
documents = []
ignore_words = ['?', '-PRON-']
# loop through each sentence in our training data
for pattern in training_data:
    # tokenize each word in the sentence
    w = nlp(pattern['sentence'])
    # Stemming and removing words
    # add to our words list
    lemmas = [w1.lemma_ for w1 in w if w1.lemma_ not in ignore_words]
    words.extend(lemmas)
    # add to documents in our corpus
    documents.append(([w1.orth_ for w1 in w],lemmas, pattern['class']))    
    # add to our classes list
    if pattern['class'] not in classes:
        classes.append(pattern['class'])

# remove duplicates
words = list(set(words))

# remove duplicates
classes = list(set(classes))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique lemmatized words", words)

In [None]:
# create our training data
training = []
output = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of lemmatized words for the pattern
    pattern_words = doc[1]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    training.append(bag)
    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[2])] = 1
    output.append(output_row)

# sample training/output
i = 0
w = documents[i][1]
print (w)
print (training[i])
print (output[i])

In [None]:
import numpy as np
import time

# compute sigmoid nonlinearity
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output*(1-output)
 
def clean_up_sentence(sentence):
    # lemmatize sentence
    return [word.lemma_ for word in nlp(sentence)]

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

def think(sentence, show_details=False):
    x = bow(sentence, words, show_details)
    if show_details:
        print ("sentence:", sentence, "\n bow:", x)
    # input layer is our bag of words
    l0 = x
    # matrix multiplication of input and hidden layer
    l1 = sigmoid(np.dot(l0, synapse_0))
    # output layer
    l2 = sigmoid(np.dot(l1, synapse_1))
    return l2

In [None]:
def train(X, y, hidden_neurons=10, alpha=1, epochs=50000, dropout=False, dropout_percent=0.5):

    print ("Training with %s neurons, alpha:%s, dropout:%s %s" % (hidden_neurons, str(alpha), dropout, dropout_percent if dropout else '') )
    print ("Input matrix: %sx%s    Output matrix: %sx%s" % (len(X),len(X[0]),1, len(classes)) )
    np.random.seed(1)

    last_mean_error = 1
    # randomly initialize our weights with mean 0
    synapse_0 = 2*np.random.random((len(X[0]), hidden_neurons)) - 1
    synapse_1 = 2*np.random.random((hidden_neurons, len(classes))) - 1

    prev_synapse_0_weight_update = np.zeros_like(synapse_0)
    prev_synapse_1_weight_update = np.zeros_like(synapse_1)

    synapse_0_direction_count = np.zeros_like(synapse_0)
    synapse_1_direction_count = np.zeros_like(synapse_1)
        
    for j in iter(range(epochs+1)):

        # Feed forward through layers 0, 1, and 2
        layer_0 = X
        layer_1 = sigmoid(np.dot(layer_0, synapse_0))
                
        if(dropout):
            layer_1 *= np.random.binomial([np.ones((len(X),hidden_neurons))],1-dropout_percent)[0] * (1.0/(1-dropout_percent))

        layer_2 = sigmoid(np.dot(layer_1, synapse_1))

        # how much did we miss the target value?
        layer_2_error = y - layer_2

        if (j% 10000) == 0 and j > 5000:
            # if this 10k iteration's error is greater than the last iteration, break out
            if np.mean(np.abs(layer_2_error)) < last_mean_error:
                print ("delta after "+str(j)+" iterations:" + str(np.mean(np.abs(layer_2_error))) )
                last_mean_error = np.mean(np.abs(layer_2_error))
            else:
                print ("break:", np.mean(np.abs(layer_2_error)), ">", last_mean_error )
                break
                
        # in what direction is the target value?
        # were we really sure? if so, don't change too much.
        layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)

        # how much did each l1 value contribute to the l2 error (according to the weights)?
        layer_1_error = layer_2_delta.dot(synapse_1.T)

        # in what direction is the target l1?
        # were we really sure? if so, don't change too much.
        layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
        
        synapse_1_weight_update = (layer_1.T.dot(layer_2_delta))
        synapse_0_weight_update = (layer_0.T.dot(layer_1_delta))
        
        if(j > 0):
            synapse_0_direction_count += np.abs(((synapse_0_weight_update > 0)+0) - ((prev_synapse_0_weight_update > 0) + 0))
            synapse_1_direction_count += np.abs(((synapse_1_weight_update > 0)+0) - ((prev_synapse_1_weight_update > 0) + 0))        
        
        synapse_1 += alpha * synapse_1_weight_update
        synapse_0 += alpha * synapse_0_weight_update
        
        prev_synapse_0_weight_update = synapse_0_weight_update
        prev_synapse_1_weight_update = synapse_1_weight_update

    now = datetime.datetime.now()

    # persist synapses
    synapse = {'synapse0': synapse_0.tolist(), 'synapse1': synapse_1.tolist(),
               'datetime': now.strftime("%Y-%m-%d %H:%M"),
               'words': words,
               'classes': classes
              }
    synapse_file = "synapses.json"

    with open(synapse_file, 'w') as outfile:
        json.dump(synapse, outfile, indent=4, sort_keys=True)
    print ("saved synapses to:", synapse_file)

In [None]:
X = np.array(training)
y = np.array(output)

start_time = time.time()

train(X, y, hidden_neurons=20, alpha=0.1, epochs=100000, dropout=False, dropout_percent=0.2)

elapsed_time = time.time() - start_time
print ("processing time:", elapsed_time, "seconds")

In [None]:
# probability threshold
ERROR_THRESHOLD = 0.2
# load our calculated synapse values
synapse_file = 'synapses.json' 
with open(synapse_file) as data_file: 
    synapse = json.load(data_file) 
    synapse_0 = np.asarray(synapse['synapse0']) 
    synapse_1 = np.asarray(synapse['synapse1'])

def classify(sentence, show_details=False):
    results = think(sentence, show_details)

    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD ] 
    # Sorting by keys (When there are more than one classification)
    results.sort(key=lambda x: x[1], reverse=True) 
    return_results =[[classes[r[0]],r[1]] for r in results]
    print ("%s \n classification: %s" % (sentence, return_results))
    
    entities = [entity.ent_type_ for entity in nlp(sentence)]
    print(entities)
    return return_results

In [None]:
classify("how are you today?")
print("\n")
classify("How is it going?")
print("\n")
classify("What are Off-road motorcycles?")
print("\n")
classify("what is Panigale?")
print("\n")
classify("What is your favorite bike?")
print("\n")
classify("What is a piston?")
print("\n")
classify("What are valves?")
print("\n")
classify("How do you make a turbocharger?")
print("\n")
classify("What is an exhaust system")
print("\n")
classify("See you tomorrow")

In [None]:
# compute sigmoid nonlinearity
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output*(1-output)
 
def clean_up_sentence(sentence):
    # lemmatize sentence
    return [word.lemma_ for word in nlp(sentence)]

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

def think(sentence, words, synapse_0, synapse_1, show_details=False):
    x = bow(sentence, words, show_details)
    if show_details:
        print ("sentence:", sentence, "\n bow:", x)
    # input layer is our bag of words
    l0 = x
    # matrix multiplication of input and hidden layer
    l1 = sigmoid(np.dot(l0, synapse_0))
    # output layer
    l2 = sigmoid(np.dot(l1, synapse_1))
    return l2

def classify(sentence, synapse_0, synapse_1, words, classes, ERROR_THRESHOLD = 0.2, show_details=False):
    #print("1")
    results = think(sentence, words, synapse_0, synapse_1, show_details)

    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD ] 
    # Sorting by keys (When there are more than one classification)
    results.sort(key=lambda x: x[1], reverse=True) 
    #print("2")
    probability = [[classes[r[0]],r[1]] for r in results]
    # print ("%s \n classification: %s" % (sentence, return_results))
    parsed_words = nlp(sentence)
    entities = [[entity.orth_, entity.ent_type_] for entity in parsed_words if entity.ent_type_ ]
    variables = [[entity.orth_, entity.dep_] for entity in parsed_words if entity.dep_ == 'dobj']
    #print("3")
    #you have to process here and add a reply to send
    #you are receiving what you send only
    return {"probability": probability, "entities": entities, "variables": variables}

In [None]:
# Loading training data
training_data = []
training_data_file = 'training_data.json' 
# training_data_file = 'training_data_specific_domain.json' 
with open(training_data_file) as data_file: 
    training_data = json.load(data_file)

words = []
classes = []
documents = []
ignore_words = ['?', '-PRON-']
# loop through each sentence in our training data
for pattern in training_data:
    # tokenize each word in the sentence
    w = nlp(pattern['sentence'])
    # Stemming and removing words
    # add to our words list
    lemmas = [w1.lemma_ for w1 in w if w1.lemma_ not in ignore_words]
    words.extend(lemmas)
    # add to documents in our corpus
    documents.append(([w1.orth_ for w1 in w],lemmas, pattern['class']))    
    # add to our classes list
    if pattern['class'] not in classes:
        classes.append(pattern['class'])

# remove duplicates
words = list(set(words))

# remove duplicates
classes = list(set(classes))

# create our training data
training = []
output = []
# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of lemmatized words for the pattern
    pattern_words = doc[1]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    training.append(bag)
    # output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[2])] = 1
    output.append(output_row)

X = np.array(training)
y = np.array(output)

start_time = time.time()

elapsed_time = time.time() - start_time
print ("processing time:", elapsed_time, "seconds")


def train(X, y, classes, words, hidden_neurons=10, alpha=1, epochs=50000, dropout=False, dropout_percent=0.5):

    print ("Training with %s neurons, alpha:%s, dropout:%s %s" % (hidden_neurons, str(alpha), dropout, dropout_percent if dropout else '') )
    print ("Input matrix: %sx%s    Output matrix: %sx%s" % (len(X),len(X[0]),1, len(classes)) )
    np.random.seed(1)

    last_mean_error = 1
    # randomly initialize our weights with mean 0
    synapse_0 = 2*np.random.random((len(X[0]), hidden_neurons)) - 1
    synapse_1 = 2*np.random.random((hidden_neurons, len(classes))) - 1

    prev_synapse_0_weight_update = np.zeros_like(synapse_0)
    prev_synapse_1_weight_update = np.zeros_like(synapse_1)

    synapse_0_direction_count = np.zeros_like(synapse_0)
    synapse_1_direction_count = np.zeros_like(synapse_1)
        
    for j in iter(range(epochs+1)):

        # Feed forward through layers 0, 1, and 2
        layer_0 = X
        layer_1 = sigmoid(np.dot(layer_0, synapse_0))
                
        if(dropout):
            layer_1 *= np.random.binomial([np.ones((len(X),hidden_neurons))],1-dropout_percent)[0] * (1.0/(1-dropout_percent))

        layer_2 = sigmoid(np.dot(layer_1, synapse_1))

        # how much did we miss the target value?
        layer_2_error = y - layer_2

        if (j% 10000) == 0 and j > 5000:
            # if this 10k iteration's error is greater than the last iteration, break out
            if np.mean(np.abs(layer_2_error)) < last_mean_error:
                print ("delta after "+str(j)+" iterations:" + str(np.mean(np.abs(layer_2_error))) )
                last_mean_error = np.mean(np.abs(layer_2_error))
            else:
                print ("break:", np.mean(np.abs(layer_2_error)), ">", last_mean_error )
                break
                
        # in what direction is the target value?
        # were we really sure? if so, don't change too much.
        layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)

        # how much did each l1 value contribute to the l2 error (according to the weights)?
        layer_1_error = layer_2_delta.dot(synapse_1.T)

        # in what direction is the target l1?
        # were we really sure? if so, don't change too much.
        layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
        
        synapse_1_weight_update = (layer_1.T.dot(layer_2_delta))
        synapse_0_weight_update = (layer_0.T.dot(layer_1_delta))
        
        if(j > 0):
            synapse_0_direction_count += np.abs(((synapse_0_weight_update > 0)+0) - ((prev_synapse_0_weight_update > 0) + 0))
            synapse_1_direction_count += np.abs(((synapse_1_weight_update > 0)+0) - ((prev_synapse_1_weight_update > 0) + 0))        
        
        synapse_1 += alpha * synapse_1_weight_update
        synapse_0 += alpha * synapse_0_weight_update
        
        prev_synapse_0_weight_update = synapse_0_weight_update
        prev_synapse_1_weight_update = synapse_1_weight_update

    now = datetime.datetime.now()

    # persist synapses
    synapse = {'synapse0': synapse_0.tolist(), 'synapse1': synapse_1.tolist(),
               'datetime': now.strftime("%Y-%m-%d %H:%M"),
               'words': words,
               'classes': classes
              }
    synapse_file = "synapses.json"
    # synapse_file = "synapses_specific.json"

    with open(synapse_file, 'w') as outfile:
        json.dump(synapse, outfile, indent=4, sort_keys=True)
    print ("saved synapses to:", synapse_file)

train(X, y, classes, words, hidden_neurons=20, alpha=0.1, epochs=200000, dropout=False, dropout_percent=0.2)

In [None]:
def bot_response(result): 
    data = result.get('probability')
    if data[0][0] == 'Welcome':
        response = 'Hello. How may I help you'
    elif data[0][0] == 'Bye':
        response = 'See you soon.'
    elif data[0][0] == 'motorbike':
        response = 'How can I help you with Bikes today'
    elif data[0][0] == "engine":
        response = "engine is the brain of bikes"
    elif data[0][0] == "weather":
        response = "It has a great weather"
    return response

In [None]:
from flask import Flask, request, jsonify
import json
import numpy as np
app = Flask(__name__)

# probability threshold
#ERROR_THRESHOLD = 0.2

# load our calculated synapse values
synapse_file = 'synapses.json' 
#synapse_file = 'synapses_specific.json' 
with open(synapse_file) as data_file: 
    synapse = json.load(data_file) 
    synapse_0 = np.asarray(synapse['synapse0']) 
    synapse_1 = np.asarray(synapse['synapse1'])
    words = synapse['words']
    classes = synapse['classes']

@app.route('/', methods = ['GET', 'POST'])
def bot_request():
    if request.method == 'GET':
        return 'Hello, World!'

    if request.method == 'POST':
        content = request.args.get('avatar', default = '*', type = str)
#         content = request.get_json()
        print("content: ",content)
        results = classify(content, synapse_0, synapse_1, words, classes) #this result should be processed to get the response
        print("result",results)
        response = bot_response(results)
        print("results: ", results)
        print("response: ", response)
        json_data = jsonify({'sentence':content, 'results': results , 'response':response })
        print("json data: ",json_data) #error in jsonify
        return json_data

if __name__ == '__main__':
    app.run(host="127.0.0.1", port=5010, debug=False )