# Intent Identification using FastText Word Embedding and Cosine Similarity

In [None]:
# Data

phrase2intent = {
    "Hi": "intent_greet",
    "Hello": "intent_greet",
    "Hi, there!": "intent_greet",

    "What is your name?": "intent_asking_name",
    "Who are you?": "intent_asking_name",
    "May I know to whom am I talking to?": "intent_asking_name",

    "Please can you tell me where is the Dean's office?": "intent_directions",
    "directions for washroom": "intent_directions",
    "Where is the engineering block": "intent_directions",

    "Can you please fix an appointment with the Dean" : "intent_book_appointment",
    "I want to meet the Cutural Head": "intent_book_appointment",
    "I wish to have a conversation with the Hostel Warden": "intent_book_appointment",
}

## Preprocessing data to be fed into the fasttext model

In [None]:
text = list(map(str.split,phrase2intent.keys()))

print(text)
'''
[['Hi'],
 ['Hello'],
 ['Hi,', 'there!'],
 ['What', 'is', 'your', 'name?'],
 ['Who', 'are', 'you?'],
 ['May', 'I', 'know', 'to', 'whom', 'am', 'I', 'talking', 'to?'],
 ['Please',
  'can',
  'you',
  'tell',
  'me',
  'where',
  'is',
  'the',
  "Dean's",
  'office?'],
 ['directions', 'for', 'washroom'],
 ['Where', 'is', 'the', 'engineering', 'block'],
 ['Can', 'you', 'please', 'fix', 'an', 'appointment', 'with', 'the', 'Dean'],
 ['I', 'want', 'to', 'meet', 'the', 'Cutural', 'Head'],
 ['I',
  'wish',
  'to',
  'have',
  'a',
  'conversation',
  'with',
  'the',
  'Hostel',
  'Warden']]
'''


## OPTION 1: Training a fasttext model from scratch

In [None]:
from gensim.models import fasttext
model = fasttext.FastText() # Using default configuration. You can set size, window, min_count, etc. Refer https://radimrehurek.com/gensim/models/fasttext.html
model.build_vocab(text)
model.train(text, total_examples=len(text), epochs=20) # You can play around with number of epochs.

## OPTION 2: Loading a pretrained fasttext model

In [None]:
from gensim.models import fasttext
# First download the bin file from https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M-subword.bin.zip and unzip it
model = fasttext.load_facebook_model('wiki-news-300d-1M-subword.bin') # Pass location of the downloaded bin file

## OPTION 3: Fine tuning the pre trained fasttext model

In [None]:
model.build_vocab(text, update=True)
model.train(text, total_examples=len(text), epochs=model.epochs) # You can play around the number of epochs

# Intent Identification by finding the maximum cosine similarity

In [None]:
user_utterance = "fix a meeting with the principal"
threshold = 0.3 # This can changed as per the need.
max_similarity = 0
for i in phrase2intent:
    if model.wv.similarity(i, user_utterance) > max_similarity:
        max_similarity = model.wv.similarity(i, user_utterance)
        max_intent = i
if max_similarity < threshold: # if for a user utterance, the maximum similarity is less than the threshold, we classify as it as unknown intent
    print("Didn't get it. Can you please type it again?")
else:
    print("The detected intent is:", phrase2intent.get(max_intent))
    
'''
The detected intent is: intent_book_appointment
'''