In [1]:
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
# 1. Load the data points
with open('intents.json', 'r') as f:
    intents_data = json.load(f)

In [4]:
# 2. Build the corpus from example utterances and associate each utterance with an intent
corpus = []
intent_mapping = []

for intent in intents_data:
    for example in intent["examples"]:
        corpus.append(example)
        intent_mapping.append(intent["name"])


In [10]:
# 3. Vectorize the corpus using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(corpus)

In [14]:
# 4. Semantic Search function
def match_intent(utterance):
    utterance_vec = vectorizer.transform([utterance])
    cosine_similarities = linear_kernel(utterance_vec, tfidf_matrix).flatten()
    # Find the most similar utterance in the corpus
    matched_index = cosine_similarities.argmax()
    return intent_mapping[matched_index]


In [19]:
# Test
utterance = "I want to book a ticket to Delhi."
print(match_intent(utterance))

book-a-ticket


In [21]:
# Test
utterance = "I want to learn algebra"
print(match_intent(utterance))

Online-Tutoring
