## Imports

In [1]:
import random
import numpy as np
from string import punctuation

import nltk
from nltk.stem.snowball import SnowballStemmer as stemmer_fn

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras import utils
from tensorflow.keras import layers

## Load data & configure NLTK

In [2]:
LANG = 'english'

stemmer = stemmer_fn(LANG)
nltk.download('punkt')

import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

[nltk_data] Downloading package punkt to /home/kuba/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Extract corpus & intent from json

In [3]:
words = []
documents = []
classes = sorted(list(set([intent['tag'] for intent in intents['intents']])))
stop_words = set(list(punctuation))

def pre_process_words(wrds, stop):
    return [stemmer.stem(w.lower()) for w in wrds if w not in stop]

def pre_process_sentence(sentence, stop):
    wrds = nltk.word_tokenize(sentence)
    return [stemmer.stem(w.lower()) for w in wrds if w not in stop]

def bow_fn(sentence, words):
    wrds = pre_process_sentence(sentence, stop_words)
    bag = np.zeros((num_words))

    for i,w in enumerate(words):
        if w in wrds:
            bag[i] = 1
            
    return bag
            
# Go over the intents and their respective patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:

        # tokenize patterns & skip stop words
        wrds = nltk.word_tokenize(pattern)
        words.extend(wrds)

        # create pairs (tokenized sentence, intent)
        documents.append((wrds, intent['tag']))

'''
dictionary of words
- stemmed
- lowercase
- not in stop_words list
'''
words = pre_process_words(words, stop_words)
words = sorted(list(set(words)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique stemmed words", words)

33 documents
8 classes ['More education', 'More experience', 'education', 'experience', 'goodbye', 'greeting', 'noanswer', 'thanks']
48 unique stemmed words ["'s", 'about', 'anyon', 'are', 'befor', 'bye', 'cheer', 'day', 'degre', 'did', 'do', 'educ', 'elabor', 'exact', 'experi', 'good', 'goodby', 'have', 'hello', 'help', 'hey', 'hi', 'how', 'is', 'it', 'later', 'me', 'more', 'now', 'obtain', 'on', 'project', 'right', 'see', 'so', 'someth', 'somewher', 'studi', 'tell', 'thank', 'that', 'the', 'there', 'what', 'where', 'work', 'you', 'your']


## Create training data

In [35]:
num_documents = len(documents)
num_classes = len(classes)
num_words = len(words)
num_classes = len(classes)

X = np.zeros((num_documents, num_words))
y = np.zeros((num_documents, num_classes))

# training set, bag of words for each sentence
for j,doc in enumerate(documents):
    wrds, intent = doc
    wrds = pre_process_words(wrds, stop_words)
    
    for i,w in enumerate(words):
        if w in wrds:
            X[j,i] = 1

    y[j,classes.index(intent)] = 1

## Build & train simple model 

Here I actually don't mind the model overfitting the data

In [68]:
idx = np.arange(num_documents)
random.shuffle(idx)

X = X[idx]
y = y[idx]

num_neurons = 10

model = Sequential()
model.add(Dense(num_neurons, input_shape=(X.shape[1],)))
model.add(Dense(num_neurons))
model.add(Dense(num_neurons))
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
history = model.fit(np.array(X), np.array(y), epochs=500, batch_size=8)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 10)                490       
_________________________________________________________________
dense_21 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_22 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_23 (Dense)             (None, 8)                 88        
Total params: 798
Trainable params: 798
Non-trainable params: 0
_________________________________________________________________
Train on 33 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/

Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epo

Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 

Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 

Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 

Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 

Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [69]:
context = {}

def inference(sentence, threshold, show_details=False):
    p = bow_fn(sentence, words)
    p = np.expand_dims(p,axis=0)

    results = model.predict(p)[0]
    y_pred = np.argmax(results)
    
    if show_details:
        print(results, y_pred)
    
    if results[y_pred] > threshold:
        return y_pred
    else:
        return None

In [70]:
text = 'Talk to you later jakub'

threshold = 0.3
t = inference(text, threshold)
print(classes[t])

goodbye


In [71]:
def response(sentence, userID='user_ID', show_details=False):
    results = inference(sentence, threshold, show_details)

    if results is not None:
        intent_pred = classes[results]

        for intent in intents['intents']:
            if intent['tag'] == intent_pred:
                if 'context_set' in intent:
                    context[userID] = intent['context_set']

                    if show_details: 
                        print ('context:', intent['context_set'])

                # check if this intent is contextual and applies to this user's conversation
                if not 'context_filter' in intent or \
                    (userID in context and 'context_filter' in intent and intent['context_filter'] == context[userID]):
                    if show_details: 
                        print('tag:', intent['tag'])

                    return print(random.choice(intent['responses']))

In [72]:
print(classes)

context = {}
print('\n\n',context)
response("What's your education", userID='123', show_details=True)
print('\n\n',context)
response("tell me more about it", userID='123', show_details=True)
response('alright thanks', userID='123', show_details=True)

['More education', 'More experience', 'education', 'experience', 'goodbye', 'greeting', 'noanswer', 'thanks']


 {}
[2.1008150e-06 1.6923414e-08 9.9464875e-01 3.8202158e-03 5.4121990e-04
 5.1938791e-09 1.0431967e-07 9.8760403e-04] 2
context: more_education
tag: education
I recently graduated from master degree.


 {'123': 'more_education'}
[4.9114689e-01 5.0858384e-01 1.0470107e-05 4.3474390e-07 9.5000200e-05
 1.2722837e-04 3.3919070e-05 2.2319523e-06] 1
[7.2793860e-05 8.2190958e-08 1.1453610e-06 1.8612598e-08 1.1317981e-04
 5.7426109e-06 3.6355166e-03 9.9617153e-01] 7
context: 
tag: thanks
My pleasure


In [73]:
context = {}
response("What's your education", userID='12345')
response("tell me more", userID='12345')
response("tell me more", userID='12345')
response('alright thanks', userID='12345')
response("cheers")
response("bye")

print('\n\n')
response("Where do you work", userID='15')
response("tell me more", userID='15')
response("tell me more", userID='15')
response('alright thanks', userID='15')
response("cheers")
response("bye")

I recently graduated from master degree.
You're welcome!
You're welcome!
See you later, thanks for visiting



I did some projects within CV and NLP
My career started as web developer, but I always wanted to work with ML.
Sample projects are listed on my portfolio website: https://jakubczerny.wixsite.com/portfolio
You're welcome!
My pleasure
Bye! Come back again soon.
