In [None]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle

import random
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

In [None]:
#This line initializes an empty list named words.
#It will be used to store unique words from the dataset.
words=[]

# this line initializes an empty list named classes.
# It will be used to store unique classes or categories of intents from the dataset.
categories = []

#This line initializes an empty list named documents.
#It will be used to store tuples where each tuple consists of a list of words (representing a pattern or sentence) and its corresponding class
documents = []

#This line initializes a list named ignore_words containing characters that should be ignored or treated separately.
ignore_words = ['?', '!', '@', '#', '`', '$', '%', '^', '&', '*', '(', ')', '_', '+']

In [None]:
data_file = open('D:\Final Year Project\Code\Frontend_Backend\model\Chabotdataset.json').read()
intents = json.loads(data_file)
print(intents)

In [None]:
for intent in intents['intents']:

  for pattern in intent['patterns']:
    #take each pattern and tokenize it
    w = nltk.word_tokenize(pattern)

    #adding w into words list
    words.extend(w)

    #adding documents
    documents.append((w,intent['tag']))

    #adding categories to our category list
    if intent['tag'] not in categories:
      categories.append(intent['tag'])

print(words)
print(documents)
print(categories)

In [None]:
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
print(words)

In [None]:
# list of lemmatized words converted to a set to ensure uniqueness and then back to a sorted list.
# This list now contains all unique lemmatized words.
words = sorted(list(set(words)))
print(words)

In [None]:
# Similar to the words, the list of classes is converted to a set for uniqueness and then back to a sorted list.
categories = sorted(list(set(categories)))
print(categories)

In [None]:
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(categories,open('categories.pkl','wb'))

In [None]:
# initializing training data
training = []
output_empty = [0] * len(categories)
for doc in documents:
    # initializing bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[categories.index(doc[1])] = 1

    training.append([bag, output_row])
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training,dtype=object)
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")

In [None]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("model created")

In [None]:
import nltk
import numpy as np
from keras.models import load_model

nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

# Load the trained model
model = load_model('D:\Final Year Project\Code\Frontend_Backend\Model\chatbot_model.h5')

# Load preprocessed data (words and categories)
import pickle
words = pickle.load(open('D:\Final Year Project\Code\Frontend_Backend\model\words.pkl', 'rb'))
categories = pickle.load(open('D:\Final Year Project\Code\Frontend_Backend\model\categories.pkl', 'rb'))

lemmatizer = WordNetLemmatizer()

def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print("found in bag: %s" % w)
    return(np.array(bag))

def predict_class(sentence, model):
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": categories[r[0]], "probability": str(r[1])})
    return return_list

def get_response(intents_list, intents_json):
    tag = intents_list[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

def chatbot_response(user_input):
    intents = predict_class(user_input, model)
    response = get_response(intents, intents_json)
    return response


In [None]:
import json

# Load your intents JSON file
with open('D:\Final Year Project\Code\Frontend_Backend\model\Chabotdataset.json', 'r') as file:
    intents_json = json.load(file)


In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

@app.route('/flask', methods=['GET'])
def index():
    return "Flask server"

@app.route('/chatbot', methods=['POST'])
def index1():
    data = request.get_json()

    # Make sure to access the correct key in the JSON data
    user_input = data.get('userInput', '')

    if user_input.lower() == 'bye':
        response = "Chatbot: Goodbye!"
    else:
        # Assuming chatbot_response is a valid function
        response = chatbot_response(user_input)

    print("User:", user_input)
    print("Chatbot:", response)

    # Send the response back to Node.js
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(port=5000, debug=False)