## Imports

In [1]:
import random
import numpy as np
from string import punctuation

import nltk
from nltk.stem.snowball import SnowballStemmer as stemmer_fn

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras import utils
from tensorflow.keras import layers

## Load data & configure NLTK

In [2]:
LANG = 'english'

stemmer = stemmer_fn(LANG)
nltk.download('punkt')

import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

[nltk_data] Downloading package punkt to /home/kuba/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Extract corpus & intent from json

In [3]:
words = []
documents = []
classes = sorted(list(set([intent['tag'] for intent in intents['intents']])))
stop_words = set(list(punctuation))

def pre_process_words(wrds, stop):
    return [stemmer.stem(w.lower()) for w in wrds if w not in stop]

def pre_process_sentence(sentence, stop):
    wrds = nltk.word_tokenize(sentence)
    return [stemmer.stem(w.lower()) for w in wrds if w not in stop]

def bow_fn(sentence, words):
    wrds = pre_process_sentence(sentence, stop_words)
    bag = np.zeros((num_words))

    for i,w in enumerate(words):
        if w in wrds:
            bag[i] = 1
            
    return bag
            
# Go over the intents and their respective patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:

        # tokenize patterns & skip stop words
        wrds = nltk.word_tokenize(pattern)
        words.extend(wrds)

        # create pairs (tokenized sentence, intent)
        documents.append((wrds, intent['tag']))

'''
dictionary of words
- stemmed
- lowercase
- not in stop_words list
'''
words = pre_process_words(words, stop_words)
words = sorted(list(set(words)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique stemmed words", words)

33 documents
8 classes ['More education', 'More experience', 'education', 'experience', 'goodbye', 'greeting', 'noanswer', 'thanks']
48 unique stemmed words ["'s", 'about', 'anyon', 'are', 'befor', 'bye', 'cheer', 'day', 'degre', 'did', 'do', 'educ', 'elabor', 'exact', 'experi', 'good', 'goodby', 'have', 'hello', 'help', 'hey', 'hi', 'how', 'is', 'it', 'later', 'me', 'more', 'now', 'obtain', 'on', 'project', 'right', 'see', 'so', 'someth', 'somewher', 'studi', 'tell', 'thank', 'that', 'the', 'there', 'what', 'where', 'work', 'you', 'your']


## Create training data

In [35]:
num_documents = len(documents)
num_classes = len(classes)
num_words = len(words)
num_classes = len(classes)

X = np.zeros((num_documents, num_words))
y = np.zeros((num_documents, num_classes))

# training set, bag of words for each sentence
for j,doc in enumerate(documents):
    wrds, intent = doc
    wrds = pre_process_words(wrds, stop_words)
    
    for i,w in enumerate(words):
        if w in wrds:
            X[j,i] = 1

    y[j,classes.index(intent)] = 1

## Build & train simple model 

Here I actually don't mind the model overfitting the data

In [62]:
idx = np.arange(num_documents)
random.shuffle(idx)

X = X[idx]
y = y[idx]

num_neurons = 10

model = Sequential()
model.add(Dense(num_neurons, input_shape=(X.shape[1],)))
model.add(Dense(num_neurons))
model.add(Dense(num_neurons))
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
history = model.fit(np.array(X), np.array(y), epochs=50, batch_size=8)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 10)                490       
_________________________________________________________________
dense_17 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_18 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_19 (Dense)             (None, 8)                 88        
Total params: 798
Trainable params: 798
Non-trainable params: 0
_________________________________________________________________
Train on 33 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epo

In [43]:
context = {}

def inference(sentence, threshold, show_details=False):
    p = bow_fn(sentence, words)
    p = np.expand_dims(p,axis=0)

    results = model.predict(p)[0]
    y_pred = np.argmax(results)
    
    if show_details:
        print(results, y_pred)
    
    if results[y_pred] > threshold:
        return y_pred
    else:
        return None

In [52]:
text = 'Talk to you later jakub'

threshold = 0.3
t = inference(text, threshold)
print(classes[t])

goodbye


In [53]:
def response(sentence, userID='user_ID', show_details=False):
    results = inference(sentence, threshold, show_details)

    if results is not None:
        intent_pred = classes[results]

        for intent in intents['intents']:
            if intent['tag'] == intent_pred:
                if 'context_set' in intent:
                    context[userID] = intent['context_set']

                    if show_details: 
                        print ('context:', intent['context_set'])

                # check if this intent is contextual and applies to this user's conversation
                if not 'context_filter' in intent or \
                    (userID in context and 'context_filter' in intent and intent['context_filter'] == context[userID]):
                    if show_details: 
                        print('tag:', intent['tag'])

                    return print(random.choice(intent['responses']))

In [56]:
print(classes)

context = {}
print('\n\n',context)
response("What's your education", userID='123', show_details=True)
print('\n\n',context)
response("tell me more about it", userID='123', show_details=True)
response('alright thanks', userID='123', show_details=True)

['More education', 'More experience', 'education', 'experience', 'goodbye', 'greeting', 'noanswer', 'thanks']


 {}
[1.5713117e-03 1.0105671e-06 9.8102254e-01 1.7396430e-02 8.7539702e-06
 1.1174781e-14 2.5669622e-16 2.9150478e-12] 2
context: more_education
tag: education
I recently graduated from master degree.


 {'123': 'more_education'}
[6.0428804e-01 3.9481246e-01 4.5078650e-06 3.7309271e-04 5.2438446e-07
 3.2260607e-07 6.3809952e-05 4.5733442e-04] 0
tag: More education
Bachelor in Data Science & master in Mathematical Modelling and Computation
[2.7577818e-07 4.2676228e-07 1.8495129e-12 2.7759708e-16 8.0395889e-16
 5.5385695e-05 5.4139377e-09 9.9994397e-01] 7
context: 
tag: thanks
Any time!


In [61]:
context = {}
response("What's your education", userID='12345')
response("tell me more", userID='12345')
response("tell me more", userID='12345')
response('alright thanks', userID='12345')
response("cheers")
response("bye")

print('\n\n')
response("Where do you work", userID='15')
response("tell me more", userID='15')
response("tell me more", userID='15')
response('alright thanks', userID='15')
response("cheers")
response("bye")

I hold BSc & MSc
My pleasure
You're welcome!
Have a nice day



I worked as ML engineer.
I worked on medical image segmentation but also news article classification.
Sample projects are listed on my portfolio website: https://jakubczerny.wixsite.com/portfolio
You're welcome!
You're welcome!
Have a nice day
