# Revision of Code

- use bag of words instead of tfidf
- jaccard for tokens 

In [1]:
import nltk
from nltk.stem import WordNetLemmatizer, PorterStemmer
import json
import numpy as np
import spacy

In [282]:
# Load intents data
with open('../intents.json', 'r') as file:
    data = json.load(file)

intents = data['intents']

In [150]:
intents

[{'tag': 'goodbye',
  'patterns': ['Goodbye',
   'Bye',
   'See you later',
   'Take care',
   'Until next time',
   'So long',
   'Farewell',
   'Adios',
   'Catch you later',
   'Have a great day',
   'Bye for now',
   'See you soon',
   'Until we meet again',
   'Goodnight',
   'Take it easy',
   'Peace out',
   'Later gator',
   'Keep in touch',
   'Have a good one',
   'Signing off'],
  'responses': ['See you later, thanks for visiting',
   'Goodbye! If you have any more questions, feel free to ask.'],
  'context_set': ''},
 {'tag': 'developers',
  'patterns': ['what is the name of your developers',
   'what is the name of your creators',
   'what is the name of the developers',
   'what is the name of the creators',
   'who created you',
   'your developers',
   'your creators',
   'who are your developers',
   'developers',
   'you are made by',
   'you are made by whom',
   'who created you',
   'who create you',
   'creators',
   'who made you',
   'Who created you?',
   'Who 

In [151]:
# Prepare training data
patterns = []
tags = []

for intent in intents:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        tags.append(intent['tag'])

In [152]:
print(len(patterns), patterns)
print(len(tags), tags)

2876 ['Goodbye', 'Bye', 'See you later', 'Take care', 'Until next time', 'So long', 'Farewell', 'Adios', 'Catch you later', 'Have a great day', 'Bye for now', 'See you soon', 'Until we meet again', 'Goodnight', 'Take it easy', 'Peace out', 'Later gator', 'Keep in touch', 'Have a good one', 'Signing off', 'what is the name of your developers', 'what is the name of your creators', 'what is the name of the developers', 'what is the name of the creators', 'who created you', 'your developers', 'your creators', 'who are your developers', 'developers', 'you are made by', 'you are made by whom', 'who created you', 'who create you', 'creators', 'who made you', 'Who created you?', 'Who developed you?', 'Who is your creator?', 'Who made you?', 'Who are you?', 'Who are your developers?', 'Who built you?', 'Who programmed you?', 'Who is responsible for your creation?', 'Who designed you?', 'Who are the minds behind you?', 'name', 'your name', 'do you have a name', 'what are you called', 'what is yo

In [153]:
intents[5]['patterns']

['What tasks can you help with?',
 'What can you do?',
 'How can you assist me?',
 'What are your capabilities?',
 'Can you perform tasks?',
 'List of tasks',
 'What are your abilities?',
 'What tasks are you capable of?',
 'Do you have any functions?',
 'What tasks do you offer?']

In [7]:
from spacy.lang.en.stop_words import STOP_WORDS

source_nlp = spacy.load("en_core_web_sm")
source_nlp.pipe_names


['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [8]:
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

def preprocess_input(corpus):
    tokens_no_stopwords = []
    for sents in corpus:
        # print(sents, end=" ")
        tokens = source_nlp(sents)
        tokens_no_stopwords.append([token for token in tokens if not token.is_stop and not token.is_punct])
        tokens_no_stopwords = [token for token in tokens_no_stopwords if len(token) > 0]
        # tokens_no_stopwords = [words.lower() for token in tokens_no_stopwords for words in token]
        # print(tokens_no_stopwords)
        # token_stem = [stemmer.stem(token) for token in tokens_no_stopwords]  # Stemming
        # print(token, " | ", token.pos_, " | ", token.lemma_, " | ", token_stem)
        # tokens_no_stopwords.append(token)
    
    print(tokens_no_stopwords)
    return tokens_no_stopwords

In [122]:
from spacy import displacy

corpus = intents[9]['patterns']

sentences = []
entities = []
distinct_bow_lemma_stem = set()
bow_lemma = set()
for sents in corpus:
    tokens = source_nlp(sents)
    sentences.append(tokens)
    entities.append(tokens.ents)
    for token in tokens:
        if token.text not in distinct_bow_lemma_stem:
            if not token.is_stop and not token.is_punct:
                token_lemma = token.lemma_
                token_stem = stemmer.stem(token.text)
                distinct_bow_lemma_stem.add((token_lemma.lower(), token_stem.lower()))
                bow_lemma.add(token_lemma.lower())

print(entities)
# displacy.serve(sentences, style='ent')
    # for token in tokens:
    #     if not token.is_stop and not token.is_punct:
    #         print(token)

[(), (), (), (), (), (), (TUP,), (TUP,), (), (), (), (), (), (), (TUP,), (TUP,), (TUP,), (), (Address of TUP,)]


In [11]:
tokens = preprocess_input(intents[5]['patterns'])

# for token in tokens:
#     for words in token:
#         print(words.lower())

[[tasks, help], [assist], [capabilities], [perform, tasks], [List, tasks], [abilities], [tasks, capable], [functions], [tasks, offer]]


In [91]:
bow_lemma

{'address', 'college', 'find', 'locate', 'location', 'reach', 's', 'tup'}

# Bag of Words

In [160]:
# Intent Processing 

def process_intent(intents):
    processed_intent = []
    
    for intent in intents:
        bow_lemma = []
        entities = []
        for sents in intent['patterns']:    
            sent_tokens = source_nlp(sents)
            for token in sent_tokens:
                if token.lemma_ not in bow_lemma:
                    if not token.is_stop and not token.is_punct:
                        bow_lemma.append(token.lemma_.lower())
            for ents in sent_tokens.ents:
                if ents.text not in [text for text, _ in entities]:
                    entities.append((ents.text, ents.label_))
        
        processed_intent.append({
            'tag': intent['tag'],
            'patterns': bow_lemma,
            'entities': entities
        })
    return processed_intent

In [161]:
processed_intent = process_intent(intents)

In [192]:
processed_intent[2]

{'tag': 'name',
 'patterns': ['call',
  's',
  'chat',
  'take',
  'purpose',
  'function',
  'role',
  'introduce',
  'tell',
  'chatbot',
  'know',
  'people'],
 'entities': []}

In [163]:
# Extract patterns for CountVectorizer
all_patterns = []
for intent in processed_intent:
    all_patterns.extend(intent['patterns'])

In [164]:
# Import CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd

# Use CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(all_patterns)

# Convert bow_matrix into a DataFrame
bow_df = pd.DataFrame(X.toarray())

# Map the column names to vocabulary 
bow_df.columns = vectorizer.vocabulary_.keys()

In [165]:
bow_df

Unnamed: 0,goodbye,bye,later,care,time,long,farewell,adio,catch,great,...,adjustment,instructional,arrangement,consider,condition,happy,enjoy,strength,make,stand
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9433,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9434,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9435,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9436,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [171]:
# Use CountVectorizer
vectorizer2 = CountVectorizer(ngram_range=(1,2))
bow_2_n_gram = vectorizer2.fit_transform(all_patterns)

# Convert bow_matrix into a DataFrame
bow_2_df = pd.DataFrame(bow_2_n_gram.toarray())

# Map the column names to vocabulary 
bow_2_df.columns = vectorizer2.vocabulary_.keys()

bow_2_df

Unnamed: 0,goodbye,bye,later,care,time,long,farewell,adio,catch,great,...,adjustment,instructional,arrangement,consider,condition,happy,enjoy,strength,make,stand
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9433,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9434,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9435,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9436,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [178]:
for intent in intents:
    bow_lemma = []
    embeddings  = []
    for sents in intent['patterns']: 
        pattern_embeddings = []   
        sent_tokens = source_nlp(sents)
        for token in sent_tokens:
                if token.lemma_.lower() not in bow_lemma:
                    if not token.is_stop and not token.is_punct:
                        bow_lemma.append(token.lemma_.lower())
                        pattern_embeddings.append(token.vector)
        embeddings.append(np.mean(pattern_embeddings, axis=0))

print(embeddings)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[array([-0.91703016, -0.33392954, -0.8384266 ,  0.45715132,  0.48987973,
       -0.30894193,  1.0757093 ,  2.0038452 ,  0.26072258, -0.691041  ,
       -0.35900542, -0.7351078 ,  0.5569637 , -0.12620613, -0.13251168,
        0.1338596 , -0.0330262 , -0.06984696, -0.01409504, -0.20126568,
        0.5948569 , -0.6126499 , -0.67373383, -0.6025908 ,  0.25893134,
       -0.3335677 ,  0.11213247,  0.46855444,  1.0493947 , -0.03912246,
       -0.2108601 , -0.316139  ,  0.51059115,  0.02653977, -0.35139507,
       -0.13611898,  0.6287555 ,  1.6392802 , -0.39989877, -0.7597383 ,
       -0.30360436,  0.5680491 ,  1.1156765 , -0.05017847, -0.02460958,
        0.1651906 , -1.2464492 ,  0.01373807,  0.07042554, -0.15405023,
       -0.43712708,  0.17664367,  0.82827586, -1.3109981 ,  0.49664873,
       -0.4506659 , -0.22558354,  0.63083166,  0.34652117,  0.14361465,
        0.01458526, -0.70036066,  0.25485876, -0.55358934,  0.9491464 ,
        0.6181663 ,  0.16735062,  0.02952953, -0.62690103, -0.5

In [92]:
# Import CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd

vectorizer = CountVectorizer()

bow_matrix = vectorizer.fit_transform(bow_lemma)

# Convert bow_matrix into a DataFrame
bow_df = pd.DataFrame(bow_matrix.toarray())

# Map the column names to vocabulary 
bow_df.columns = vectorizer.vocabulary_.keys()

bow_df

Unnamed: 0,reach,college,tup,find,address,location,locate
0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0
3,0,0,0,0,0,0,1
4,0,0,1,0,0,0,0
5,1,0,0,0,0,0,0
6,0,0,0,0,1,0,0
7,0,0,0,1,0,0,0


In [166]:
corpus = ['The lion is the king of the jungle', 'Lions have lifespans of a decade', 'The lion is an endangered species']

# Create CountVectorizer object
vectorizer = CountVectorizer()

# Generate matrix of word vectors
bow_matrix = vectorizer.fit_transform(corpus)

# Convert bow_matrix into a DataFrame
bow_df = pd.DataFrame(bow_matrix.toarray())

# Map the column names to vocabulary 
bow_df.columns = vectorizer.vocabulary_.keys()

# Print bow_df
bow_df

Unnamed: 0,the,lion,is,king,of,jungle,lions,have,lifespans,decade,an,endangered,species
0,0,0,0,0,1,1,1,0,1,0,1,0,3
1,0,1,0,1,0,0,0,1,0,1,1,0,0
2,1,0,1,0,1,0,0,0,1,0,0,1,1


# Spacy Similarity

In [187]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
     ---------------------------------------- 0.0/42.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/42.8 MB ? eta -:--:--
     --------------------------------------- 0.0/42.8 MB 262.6 kB/s eta 0:02:43
     --------------------------------------- 0.0/42.8 MB 219.4 kB/s eta 0:03:15
     --------------------------------------- 0.1/42.8 MB 595.3 kB/s eta 0:01:12
     --------------------------------------- 0.1/42.8 MB 602.4 kB/s eta 0:01:11
     --------------------------------------- 0.2/42.8 MB 573.4 kB/s eta 0:01:15
     --------------------------------------- 0.2/42.8 MB 655.6 kB/s eta 0:01:05
     --------------------------------------- 0.3/42.8 MB 655.4 kB/s eta 0:01:05
     --------------------------------------- 0.3/42.8 MB 632.7 kB/s eta 0:01:08
     -----------------------------------

In [197]:
import spacy

nlp = spacy.load("en_core_web_md")

user = nlp("Hi tekbot")

token_patterns = [
    ['call', 's', 'chat', 'take', 'purpose', 'function', 'role' 'introduce', 'tell', 'chatbot', 'know', 'people'],
    ['hi', 'hello', 'good', 'day', 'ya', 'hey', 'whatsup']
]

In [198]:
for sents in token_patterns[0]:
    pattern = nlp(sents)
    print(user, " <-> ", pattern, user.similarity(pattern))

for sents in token_patterns[1]:
    pattern = nlp(sents)
    print(user, " <-> ", pattern, user.similarity(pattern))

Hi tekbot  <->  call 0.028812958675871428
Hi tekbot  <->  s 0.08718074892233728
Hi tekbot  <->  chat 0.21813749149033165
Hi tekbot  <->  take -0.09301951592240967
Hi tekbot  <->  purpose -0.0959089056453898
Hi tekbot  <->  function -0.07967520386419698
Hi tekbot  <->  roleintroduce 0.0
Hi tekbot  <->  tell 0.11134570051847313
Hi tekbot  <->  chatbot 0.21813749149033165
Hi tekbot  <->  know 0.14119814643824838
Hi tekbot  <->  people -0.1369659877449449
Hi tekbot  <->  hi 0.577788783511133
Hi tekbot  <->  hello 0.5091158413863858
Hi tekbot  <->  good 0.07039120456332658
Hi tekbot  <->  day -0.14136730783564347
Hi tekbot  <->  ya 0.24415735728087545
Hi tekbot  <->  hey 0.4831739779853601
Hi tekbot  <->  whatsup 0.0


  print(user, " <-> ", pattern, user.similarity(pattern))
  print(user, " <-> ", pattern, user.similarity(pattern))


In [280]:
def spacy_similarity(user):
    user = nlp(user)
    similarities = []
    tags = []
    for intent in intents:
        for sents in intent['patterns']:
            pattern = nlp(sents)
            print(user, " <-> ", pattern, user.similarity(pattern))
            similarities.append(user.similarity(pattern))
            tags.append(intent['tag'])
            
    max_similarity = max(similarities)
    max_similarity_index = similarities.index(max_similarity)
    # max_similarity_intent = intents[max_similarity_index]['tag']
    return max_similarity, max_similarity_index, tags

In [272]:
max_similarity, max_similarity_index, tags = spacy_similarity("who is the tup president?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

  similarities.append(user.similarity(pattern))


0.9409467135867351
156
principal


In [273]:
max_similarity, max_similarity_index, tags = spacy_similarity("who is the head of OIC?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

  similarities.append(user.similarity(pattern))


0.8986903642443768
345
officials_academic_affairs


In [266]:
for intent in intents:
    if tags[max_similarity_index] == intent['tag']:
        print(intent['responses'][0])
        break

[{'Office/Unit': 'College of Industrial Technology', 'Designation': 'Dean', 'Name': 'Asst. Prof. Elpidio S. Virrey', 'Email': 'elpidio_virrey@tup.edu.ph, cit@tup.edu.ph'}, {'Office/Unit': 'College of Industrial Technology', 'Designation': 'College Secretary', 'Name': 'Assoc. Prof. Dr. May Ann R. Codera', 'Email': 'mayann_codera@tup.edu.ph, cit@tup.edu.ph'}, {'Office/Unit': 'Basic Industrial Technology', 'Designation': 'Head', 'Name': 'Assoc. Prof. Jose C. Delos Santos Jr.', 'Email': 'jose_delossantos@tup.ed.ph, bit@tup.edu.ph'}, {'Office/Unit': 'Food and Apparel Technology', 'Designation': 'Head', 'Name': 'Assoc. Prof. Bernadeth Gilbor', 'Email': 'bernadeth_gilbor@tup.edu.ph'}, {'Office/Unit': 'Graphic and Arts Department', 'Designation': 'Head', 'Name': 'Asst. Prof. Ms. Lotis Palma-Buco', 'Email': 'lotis_buco@tup.edu.ph'}, {'Office/Unit': 'Mechanical Engineering Technology', 'Designation': 'Head', 'Name': 'Assoc. Prof. Andrew John A. Mabaquiao', 'Email': 'andrewjohn_mabaquiao@tup.edu.

In [285]:
max_similarity, max_similarity_index, tags = spacy_similarity("how to enroll in cos?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

for intent in intents:
    if tags[max_similarity_index] == intent['tag']:
        print(intent['responses'][0])
        break

how to enroll in cos?  <->  Goodbye -0.1483349645944867
how to enroll in cos?  <->  Bye -0.0975015576901771
how to enroll in cos?  <->  See you later 0.450238740479442
how to enroll in cos?  <->  Take care 0.27805680207512534
how to enroll in cos?  <->  Until next time 0.45552156550562684
how to enroll in cos?  <->  So long 0.39483163419446693
how to enroll in cos?  <->  Farewell 0.052654461112242854
how to enroll in cos?  <->  Adios -0.19096510709891626
how to enroll in cos?  <->  Catch you later 0.4935862360578539
how to enroll in cos?  <->  Have a great day 0.3542387371665089
how to enroll in cos?  <->  Bye for now 0.3808261128955816
how to enroll in cos?  <->  See you soon 0.42589875662622395
how to enroll in cos?  <->  Until we meet again 0.5486148393289185
how to enroll in cos?  <->  Goodnight 0.0667483086399369
how to enroll in cos?  <->  Take it easy 0.3762409167139063
how to enroll in cos?  <->  Peace out 0.28040551114891044
how to enroll in cos?  <->  Later gator 0.0733032668

  print(user, " <-> ", pattern, user.similarity(pattern))
  similarities.append(user.similarity(pattern))


how to enroll in cos?  <->  okie -0.04670083385548816
how to enroll in cos?  <->  nice work 0.42768991373164456
how to enroll in cos?  <->  well done 0.3464812529535893
how to enroll in cos?  <->  good job 0.35128709201882957
how to enroll in cos?  <->  thanks for the help 0.5323187685348931
how to enroll in cos?  <->  Thank You 0.28728178402406446
how to enroll in cos?  <->  its ok 0.17430433661632097
how to enroll in cos?  <->  Thanks 0.04791357685917845
how to enroll in cos?  <->  Good work 0.3065064042377957
how to enroll in cos?  <->  k 0.055174822148217416
how to enroll in cos?  <->  ok 0.006579932380505053
how to enroll in cos?  <->  okay 0.2091494684407851
how to enroll in cos?  <->  Hi -0.12607786491578976
how to enroll in cos?  <->  How are you?  0.540578808545919
how to enroll in cos?  <->  Is anyone there? 0.370746692044777
how to enroll in cos?  <->  Hello  -0.030746566334198304
how to enroll in cos?  <->  Good day 0.1293732997087411
how to enroll in cos?  <->  What's up 0

In [278]:
user = nlp("who is the head of OIC?")

for sents in intents[62]['patterns']:
    pattern = nlp(sents)
    print(user, " <-> ", pattern, user.similarity(pattern))

who is the head of OIC?  <->  Who is the head of the admissions office? 0.8608542285791516
who is the head of OIC?  <->  Can you provide information about the admissions office? 0.5485482048865554
who is the head of OIC?  <->  Who oversees admissions at TUP? 0.34247337283907087
who is the head of OIC?  <->  Who is in charge of admissions? 0.7412475253070415
who is the head of OIC?  <->  Who manages the admissions process? 0.6366492466888177
who is the head of OIC?  <->  Who is the head of the admission office? 0.8597035819707299
who is the head of OIC?  <->  Who is responsible for admissions? 0.6373736886575521
who is the head of OIC?  <->  What is the name of the person in charge of admissions? 0.8035832410434001
who is the head of OIC?  <->  Who leads the admissions team? 0.6320031272656981
who is the head of OIC?  <->  Who is the head of the Admissions Office? 0.8495964567814529


In [256]:
user = nlp("who is the department head in cos computer studies?")

users_input = [(user.text, user.ent_type_, user.dep_, spacy.explain(user.dep_)) for user in user]
tokens = [user.text for user in user if not user.is_stop and not user.is_punct]
joined_text = ' '.join(tokens)
print(users_input)
print(joined_text)
print(type(joined_text))

[('who', '', 'attr', 'attribute'), ('is', '', 'ROOT', 'root'), ('the', '', 'det', 'determiner'), ('department', '', 'compound', 'compound'), ('head', '', 'attr', 'attribute'), ('in', '', 'prep', 'prepositional modifier'), ('cos', '', 'prep', 'prepositional modifier'), ('computer', '', 'compound', 'compound'), ('studies', '', 'pobj', 'object of preposition'), ('?', '', 'punct', 'punctuation')]
department head cos computer studies
<class 'str'>


In [None]:
# Compare similarities with tokens no stop words and punctuations

def spacy_similarity(user):
    users = nlp(user)
    user_tokens = [user.text for user in users if not user.is_stop and not user.is_punct]
    users = ' '.join(user_tokens)
    user = nlp(users)
    similarities = []
    tags = []
    for intent in intents:
        for sents in intent['patterns']:
            patterns = nlp(sents)
            pattern_tokens = [pat.text for pat in patterns if not pat.is_stop and not pat.is_punct]
            patterns = ' '.join(pattern_tokens)
            pattern = nlp(patterns)
            # print(user, " <-> ", pattern, user.similarity(pattern))
            similarities.append(user.similarity(pattern))
            tags.append(intent['tag'])
            
    max_similarity = max(similarities)
    max_similarity_index = similarities.index(max_similarity)
    # max_similarity_intent = intents[max_similarity_index]['tag']
    return max_similarity, max_similarity_index, tags

### Conclution for spacy similarity

if I compare the user's input text **as a whole without text processing** it gives a **higher performance**

if I compare the user's input text **with text processing** (*removing the stop words and punctuations*) it gives a **lower performance**

In [303]:
def jaccard_similarity(user):
    users = nlp(user)
    user = set(user.text.lower() for user in users if not user.is_stop and not user.is_punct)
    similarities = []
    tags = []
    for intent in intents:
        for sents in intent['patterns']:
            patterns = nlp(sents)
            pattern = set(pat.text.lower() for pat in patterns if not pat.is_stop and not pat.is_punct)
            print(user, " <-> ", pattern, len(user.intersection(pattern)) / len(user.union(pattern)))
            similarities.append(len(user.intersection(pattern)) / len(user.union(pattern)))
            tags.append(intent['tag'])
            
    max_similarity = max(similarities)
    max_similarity_index = similarities.index(max_similarity)
    return max_similarity, max_similarity_index, tags

In [304]:
max_similarity, max_similarity_index, tags = jaccard_similarity("who is the department head in cos computer studies?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

for intent in intents:
    if tags[max_similarity_index] == intent['tag']:
        print(intent['responses'][0])
        break

{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'goodbye'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'bye'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'later'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'care'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'time'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'long'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'farewell'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'adios'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'later', 'catch'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'day', 'great'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'bye'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'soon'} 0.0
{'head', 'department', 'cos', 'studies', 'computer'}  <->  {'meet'} 0.0
{'head', 'department', 'cos', 'studies'

In [305]:
max_similarity, max_similarity_index, tags = jaccard_similarity("who is the head of OIC?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

for intent in intents:
    if tags[max_similarity_index] == intent['tag']:
        print(intent['responses'][0])
        break

{'head', 'oic'}  <->  {'goodbye'} 0.0
{'head', 'oic'}  <->  {'bye'} 0.0
{'head', 'oic'}  <->  {'later'} 0.0
{'head', 'oic'}  <->  {'care'} 0.0
{'head', 'oic'}  <->  {'time'} 0.0
{'head', 'oic'}  <->  {'long'} 0.0
{'head', 'oic'}  <->  {'farewell'} 0.0
{'head', 'oic'}  <->  {'adios'} 0.0
{'head', 'oic'}  <->  {'later', 'catch'} 0.0
{'head', 'oic'}  <->  {'day', 'great'} 0.0
{'head', 'oic'}  <->  {'bye'} 0.0
{'head', 'oic'}  <->  {'soon'} 0.0
{'head', 'oic'}  <->  {'meet'} 0.0
{'head', 'oic'}  <->  {'goodnight'} 0.0
{'head', 'oic'}  <->  {'easy'} 0.0
{'head', 'oic'}  <->  {'peace'} 0.0
{'head', 'oic'}  <->  {'later', 'gator'} 0.0
{'head', 'oic'}  <->  {'touch'} 0.0
{'head', 'oic'}  <->  {'good'} 0.0
{'head', 'oic'}  <->  {'signing'} 0.0
{'head', 'oic'}  <->  {'developers'} 0.0
{'head', 'oic'}  <->  {'creators'} 0.0
{'head', 'oic'}  <->  {'developers'} 0.0
{'head', 'oic'}  <->  {'creators'} 0.0
{'head', 'oic'}  <->  {'created'} 0.0
{'head', 'oic'}  <->  {'developers'} 0.0
{'head', 'oic'} 

In [299]:
max_similarity, max_similarity_index, tags = jaccard_similarity("who is the dean of cos?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

for intent in intents:
    if max_similarity == 0.0:
        print("Im Sorry, i did not understand what you said")
        break
    if tags[max_similarity_index] == intent['tag']:
        print(intent['responses'][0])
        break

{'dean', 'cos'}  <->  {'Goodbye'} 0.0
{'dean', 'cos'}  <->  {'Bye'} 0.0
{'dean', 'cos'}  <->  {'later'} 0.0
{'dean', 'cos'}  <->  {'care'} 0.0
{'dean', 'cos'}  <->  {'time'} 0.0
{'dean', 'cos'}  <->  {'long'} 0.0
{'dean', 'cos'}  <->  {'Farewell'} 0.0
{'dean', 'cos'}  <->  {'Adios'} 0.0
{'dean', 'cos'}  <->  {'later', 'Catch'} 0.0
{'dean', 'cos'}  <->  {'day', 'great'} 0.0
{'dean', 'cos'}  <->  {'Bye'} 0.0
{'dean', 'cos'}  <->  {'soon'} 0.0
{'dean', 'cos'}  <->  {'meet'} 0.0
{'dean', 'cos'}  <->  {'Goodnight'} 0.0
{'dean', 'cos'}  <->  {'easy'} 0.0
{'dean', 'cos'}  <->  {'Peace'} 0.0
{'dean', 'cos'}  <->  {'gator', 'Later'} 0.0
{'dean', 'cos'}  <->  {'touch'} 0.0
{'dean', 'cos'}  <->  {'good'} 0.0
{'dean', 'cos'}  <->  {'Signing'} 0.0
{'dean', 'cos'}  <->  {'developers'} 0.0
{'dean', 'cos'}  <->  {'creators'} 0.0
{'dean', 'cos'}  <->  {'developers'} 0.0
{'dean', 'cos'}  <->  {'creators'} 0.0
{'dean', 'cos'}  <->  {'created'} 0.0
{'dean', 'cos'}  <->  {'developers'} 0.0
{'dean', 'cos'} 

In [306]:
max_similarity, max_similarity_index, tags = jaccard_similarity("who is the tup president?")

print(max_similarity)
print(max_similarity_index)
print(tags[max_similarity_index])

for intent in intents:
    if max_similarity <= 0.60:
        print("Im Sorry, i did not understand what you said")
        break
    if tags[max_similarity_index] == intent['tag']:
        print(intent['responses'][0])
        break

{'tup', 'president'}  <->  {'goodbye'} 0.0
{'tup', 'president'}  <->  {'bye'} 0.0
{'tup', 'president'}  <->  {'later'} 0.0
{'tup', 'president'}  <->  {'care'} 0.0
{'tup', 'president'}  <->  {'time'} 0.0
{'tup', 'president'}  <->  {'long'} 0.0
{'tup', 'president'}  <->  {'farewell'} 0.0
{'tup', 'president'}  <->  {'adios'} 0.0
{'tup', 'president'}  <->  {'later', 'catch'} 0.0
{'tup', 'president'}  <->  {'day', 'great'} 0.0
{'tup', 'president'}  <->  {'bye'} 0.0
{'tup', 'president'}  <->  {'soon'} 0.0
{'tup', 'president'}  <->  {'meet'} 0.0
{'tup', 'president'}  <->  {'goodnight'} 0.0
{'tup', 'president'}  <->  {'easy'} 0.0
{'tup', 'president'}  <->  {'peace'} 0.0
{'tup', 'president'}  <->  {'later', 'gator'} 0.0
{'tup', 'president'}  <->  {'touch'} 0.0
{'tup', 'president'}  <->  {'good'} 0.0
{'tup', 'president'}  <->  {'signing'} 0.0
{'tup', 'president'}  <->  {'developers'} 0.0
{'tup', 'president'}  <->  {'creators'} 0.0
{'tup', 'president'}  <->  {'developers'} 0.0
{'tup', 'president'

In [300]:
users = nlp("list ")
user = set(user.text.lower() for user in users if not user.is_stop and not user.is_punct)

for sents in intents[4]['patterns']:
    patterns = nlp(sents)
    pattern = set(pat.text.lower() for pat in patterns if not pat.is_stop and not pat.is_punct)
    print(user, " <-> ", pattern, len(user.intersection(pattern)) / len(user.union(pattern)))

{'hi'}  <->  {'hi'} 1.0
{'hi'}  <->  set() 0.0
{'hi'}  <->  set() 0.0
{'hi'}  <->  {'hello'} 0.0
{'hi'}  <->  {'good', 'day'} 0.0
{'hi'}  <->  set() 0.0
{'hi'}  <->  {'ya'} 0.0
{'hi'}  <->  {'hey'} 0.0
{'hi'}  <->  {'whatsup'} 0.0
