In [99]:
import tensorflow as tf
from joblib import load
import numpy as np

model_path = "chatbot.model"
vect_path = "tfidf_vectorizer.joblib"
label_enc_path = "label_encoder.joblib"

final_model = tf.keras.models.load_model(model_path)
vectorizer = load(vect_path)
label_encoder = load(label_enc_path)

In [100]:
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def clean_text(sentence):
    lower = sentence.lower()
    ignoreLetters = ['?', '!', '.', ',', '/']
    toks = word_tokenize(lower)
    stop_words = set(stopwords.words('english'))
    toks = [word for word in toks if word not in ignoreLetters]
    sentence = [word for word in sentence if word not in stop_words]
    return toks

lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    """Lemmatize tokens"""
    return [lemmer.lemmatize(token) for token in tokens]

remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    """Remove punctuations and return lemmatized tokens"""
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

# test
clean_text("Remove punctuations and return lemmatized tokens ?")

['remove', 'punctuations', 'and', 'return', 'lemmatized', 'tokens']

In [101]:
# example
text = "How can i enroll in the data science course ?"

# clean text
prep_text = clean_text(text)
# vectorize the text
vectorized_txt = vectorizer.transform(prep_text)
# make prediction
pred = final_model.predict([vectorized_txt])

ERR_THRESH = 0.25

results = {np.argmax(r): r[np.argmax(r)] for i, r in enumerate(pred) if any(r > ERR_THRESH)}
results = dict(sorted(results.items(), key=lambda k: k[0]))

print(f"\nPredicted classes: {results}")
for class_key, conf in results.items():
    print(f"{class_key}: {label_encoder.classes_[class_key]}, {round(conf*100)}%")


Predicted classes: {3: 0.97002137, 5: 0.92485756, 6: 0.44791928}
3: Data Science, 97%
5: Enrollment, 92%
6: Miscellaneous, 45%


The following are utility functions needed for the bot to run:

In [102]:
def response(user_response):
    """returns response from AI"""
    robot_response = ""
    
    TfidfVec = TfidfVectorizer(tokenizer=clean_text, stop_words='english')
    tfidf = TfidfVec.fit_transform(sentence_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    
    if (req_tfidf == 0):
        robot_response + "I am sorry. I am unable to understand you!"
        return robot_response
    else:
        robot_response = robot_response + sentence_tokens[idx]
        return robot_response

In [107]:
import json
from difflib import SequenceMatcher


ERR_THRESH = 0.25
on_fail_response = "I'm sorry, I wasn't able to understand. Kindly elaborate..."

with open("../Final_Intents.json") as f:
    intents = json.load(f)


def get_response_list(class_: str = None) -> list[str]:
    """Returns a list of responses
    """
    print(class_)
    responses = []
    for intent in intents:
        # **
        if class_ and intent["tag"] == class_:
            responses += intent["responses"]
        else:
            responses += intent["responses"]
    return responses

def classify_text(text: str) -> dict:
    """ returns model text classification
    """
    # clean text
    prep_text = clean_text(text)
    # vectorize the text
    vectorized_txt = vectorizer.transform(prep_text)
    # get predicted classes
    pred = final_model.predict([vectorized_txt])
    
    results = {np.argmax(r): r[np.argmax(r)] 
               for i, r in enumerate(pred) if any(r > ERR_THRESH)}
    # sort classes
    results = dict(sorted(results.items(), key=lambda k: k[1], reverse=True))
    
    return results


def similarity(input_str: str, pred_classes: dict) -> str:
    """Gets the most similar string
    """
    max_ratio = 0
    most_similar_str = None
    class_key = list(pred_classes.keys())[0]
    class_ = label_encoder.classes_[class_key]
    print(f"\npredicted tags: {[(label_encoder.classes_[key], conf*100) for key, conf in pred_classes.items()]}")
    
    for str_item in get_response_list(class_):
        similarity_ratio = SequenceMatcher(None, input_str, str_item).ratio()
        if similarity_ratio > max_ratio:
            max_ratio = similarity_ratio
            most_similar_str = str_item

    return most_similar_str


# entity classification
def get_response(user_input: str) -> str:
    """Gets user query and returns bot response 
    """
    pred_classes = classify_text(user_input)
    
    if not len(pred_classes.keys()):
        return on_fail_response
    
    return similarity(user_input, pred_classes)

In [109]:
# test1
get_response("How do I enroll in moringa school ?")


predicted tags: [('Miscellaneous', 99.50372576713562), ('Enrollment', 92.48575568199158)]
Miscellaneous


'For general inquiries, you can contact info@moringaschool.com.'