In [1]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
import string
import numpy as np
import pickle
import textdistance
from gensim.models import Word2Vec

In [2]:
# Load model
from keras.models import load_model

model=load_model("./Model/Model_Chatbot_Personal_W2V_LSTM.h5")
import json
import random

with open("./Dataset/intents.json",encoding="utf-8") as file:
    intents=json.load(file)
with open("./Dataset/words_w2v.pkl","rb") as file:
    words=pickle.load(file)
with open("./label_encoder_w2v.pkl","rb") as file:
    label_encoder=pickle.load(file)
    
# print(open("./Dataset/intents.json").read())
# intents2=json.loads(open("./Dataset/intents.json",encoding="utf-8").read())

In [3]:
# Load Word2Vec model
word2vec_model = Word2Vec.load("./Model/word2vec_model.Models")

In [4]:
# print(intents)
# print(intents2)

In [5]:
lemmatizer=WordNetLemmatizer()
punc=string.punctuation
stop_words=stopwords.words("english")

# function to clean
def remove_punctuation(w_list):
    return [word for word in w_list if word not in punc]

def remove_stopwords(w_list):
    return [word for word in w_list if word not in stop_words]

def remove_number(w_list):
    return [word for word in w_list if not word.isnumeric()]

def get_tag(tag):
    if tag.startswith('j'):
        return 'a'
    elif tag.startswith('v'):
        return 'v'
    elif tag.startswith('n'):
        return 'n'
    elif tag.startswith('r'):
        return 'r'
    else:
        return None

def lemmatize(w_list):
    lemmatized=[]
    tagging=pos_tag(w_list)
    for word,tag in tagging:
        tag=get_tag(tag.lower())
        if tag is None:
            lemmatized.append(word.lower())
        else:
            lemmatized.append(lemmatizer.lemmatize(word.lower(),tag))
    return lemmatized

In [6]:
def clean_up_sentence(msg):
    sentence_words=word_tokenize(msg)
    # print("Sentence word:",sentence_words)
    sentence_words=remove_stopwords(sentence_words)
    sentence_words=remove_punctuation(sentence_words)
    sentence_words=remove_number(sentence_words)
    # sentence_words=lemmatize(sentence_words)
    return sentence_words

In [7]:
def correct_typo_damerau_levenshtein(word):
    valid_words = set(words)
    threshold = 2  # Threshold for Damerau-Levenshtein Distance
    distances = {}

    for w in valid_words:
        dl_distance = textdistance.damerau_levenshtein(word, w)
        
        # If Damerau-Levenshtein distance 0, return the word immediately
        if dl_distance == 0:
            return w
        
        distances[w] = dl_distance

    # Filter words based on the threshold
    similar_words = {k: v for k, v in distances.items() if v <= threshold}

    # If there are no similar words, return the original word
    if not similar_words:
        return word

    # Return the word with the smallest Damerau-Levenshtein distance
    return min(similar_words, key=similar_words.get)

In [8]:
def get_average_doc_vector(sentence, model):
    sentence_words = clean_up_sentence(sentence)
    corrected_sentence = [correct_typo_damerau_levenshtein(word) for word in sentence_words]
    # print(f"Sentence_words: {corrected_sentence}")
    
    vector_sum = np.zeros(model.vector_size) 
    num_valid_words = 0
    
    for word in corrected_sentence:
        if word in model.wv:
            vector_sum += model.wv[word]
            num_valid_words += 1
    
    if num_valid_words == 0:
        return np.zeros(model.vector_size)
    else:
        return vector_sum / num_valid_words

In [9]:
def predict_class(msg,model):
    # filter below threshold
    v=get_average_doc_vector(msg,word2vec_model)
    # print("v:",v)
    v=v.reshape(1, 1, -1)  # Reshape for LSTM input
    # print(v.shape)
    
    res = model.predict(v, verbose=0)[0]
    # print("Model.predict:",model.predict(v,verbose=0))
    # print("res:",res)
    
    err_threshold=0.20
    # print("Result:",res.max())
    result=[[i,r] for i,r in enumerate(res) if r>err_threshold]
    # print("Result:",result)
    
    if len(result)==0:
        return 0
    
    # sorting
    result.sort(key=lambda x:x[1],reverse=True)
    # print("Result:",result)
    
    result_list=[]
    
    # for r in result:
    ress = label_encoder.inverse_transform([result[0][0]])
    # print("Ress:",ress)
    
    result_list.append({"class":ress,"probability":str(result[0][1])})
    return result_list
    

In [10]:
def get_response(class_pred,intents_json):
    tag=class_pred[0]["class"][0]
    # print("Tag:",tag)
    list_of_intents=intents_json["intents"]
    
    for i in list_of_intents:
        if i["tag"].strip()==tag.strip():
            result=random.choice(i["responses"])
            break
    return result

In [11]:
def chatbot_response(msg):
    class_predict=predict_class(msg,model)
    if class_predict == -1:
        return class_predict
    if class_predict == 0:
        return "I dont know what u mean!, please ask me something else."
    # print("Int predict:",int_predict)
    response=get_response(class_predict,intents)

    return response

In [49]:
print(f'Chatbot response: {chatbot_response("what is dep learning?")}')

Sentence_words: ['dep', 'learning']
v: [ 1.69701260e-03  4.32851992e-03  4.82629775e-03  8.65830760e-03
 -5.88284689e-03 -1.63317937e-03 -4.55258414e-05  1.05022164e-02
  5.07986639e-04 -5.55555080e-03  7.81806326e-03 -6.98564062e-03
 -6.95393933e-03  2.59662914e-03  2.89647013e-03 -6.87545678e-03
  2.40772942e-03 -4.41844622e-03 -1.06935855e-04 -6.10748725e-03
  2.43992126e-03  7.56935519e-03  4.84886521e-03 -1.65246608e-03
  7.20510213e-03  1.59797457e-03  1.66662247e-03  6.54836732e-03
 -4.51200281e-03 -3.07882577e-03 -6.91399723e-03 -3.59371612e-03
  1.00935488e-02 -8.11677845e-03 -5.52248978e-03 -4.01630811e-03
  5.97100880e-03 -1.26887462e-03  2.73084623e-03 -2.90437252e-03
 -6.15023915e-03  7.61006959e-04  1.19713601e-04 -6.86098915e-03
  4.31279629e-03  2.65113323e-03 -4.21926827e-03 -6.47725537e-05
  2.45614609e-03  2.48841336e-03 -2.52861582e-03 -1.43212650e-03
 -4.90315957e-03  2.37181009e-03  5.68440021e-03  8.21745256e-04
  5.69075905e-03 -5.72551694e-03 -7.76388100e-03  7

In [30]:
while True:
    msg=input("You: ")
    response=chatbot_response(msg)
    if response==-1:
        break
    print("Bot:",response.strip())

Bot: Hi there, how can I help?
Bot: I can answer your questions related to machine learning or any general anser. Come on try me
Bot: Deep learning (also known as deep structured learning) is part of a broader family of machine learning methods based on artificial neural networks with representation learning.
Bot: Machine learning is a branch of computer science which deals with system programming in order to automatically learn and improve with experience. For example: Robots are programmed so that they can perform the task based on data they gather from sensors. It automatically learns programs from data.
Bot: Hello, thanks for asking
Bot: Reinforcement Learning(RL) is a type of machine learning technique that enables an agent to learn in an interactive environment by trial and error using feedback from its own actions and experiences.
Bot: Good to see you again
Bot: My pleasure


#### Flask

In [12]:
# from flask_ngrok import run_with_ngrok
from flask import Flask, jsonify, render_template, request

app=Flask(__name__)


@app.route("/")
def hello():
    return render_template('chatbot.html')

def decrypt(msg):
    # input : what+is+deep+learning
    # output : what is deep learning
    # remove + , replace with space
    
    string=msg
    new_string=string.replace("+"," ")
    
    return new_string

@app.route("/query",methods=["POST"])
def chatbot_query():
    sentence=request.form["sentence"]
    # decrypt
    dec_msg=decrypt(sentence)
    
    response=chatbot_response(dec_msg)
    
    # json_response=jsonify({"top" : {"response":response}})
    # print("Response:",response)
    return response

app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [19/Jun/2024 21:45:29] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [19/Jun/2024 21:45:30] "GET /static/style.css HTTP/1.1" 200 -
127.0.0.1 - - [19/Jun/2024 21:45:30] "GET /static/Image/BotImg.png HTTP/1.1" 200 -
127.0.0.1 - - [19/Jun/2024 21:45:30] "GET /static/Image/BotImg.png HTTP/1.1" 304 -
127.0.0.1 - - [19/Jun/2024 21:45:32] "GET /static/Image/UserImg.jpg HTTP/1.1" 200 -
127.0.0.1 - - [19/Jun/2024 21:45:51] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [19/Jun/2024 21:45:51] "GET /static/Image/BotImg.png HTTP/1.1" 304 -
127.0.0.1 - - [19/Jun/2024 21:45:58] "POST /query HTTP/1.1" 200 -
127.0.0.1 - - [19/Jun/2024 21:46:02] "GET /static/style.css HTTP/1.1" 304 -


In [None]:
!pip list

Package                   Version
------------------------- ----------
anyio                     4.1.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
asttokens                 2.4.1
async-lru                 2.0.4
attrs                     23.1.0
Babel                     2.13.1
beautifulsoup4            4.12.2
bleach                    6.1.0
blinker                   1.7.0
certifi                   2021.5.30
cffi                      1.16.0
chardet                   4.0.0
charset-normalizer        3.3.2
click                     8.1.7
colorama                  0.4.6
comm                      0.2.0
contourpy                 1.2.0
cycler                    0.12.1
debugpy                   1.8.0
decorator                 5.1.1
defusedxml                0.7.1
distlib                   0.3.7
executing                 2.0.1
fastjsonschema            2.19.0
filelock                  3.13.1
Flask                     3.0.2
flask-ngrok         


[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: C:\Users\micha\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip
