In [1]:
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, Dense, Bidirectional, LSTM, Input
from transformers import DistilBertTokenizer, TFDistilBertModel
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import tensorflow as tf


In [2]:

# Load and preprocess the data
with open("que&ans.txt", "r", encoding="utf-8") as file:
    data = file.read().split('\n')

questions = []
answers = []

for line in data:
    if line.startswith('|Q|'):
        questions.append(line[3:])
    elif line.startswith('|A|'):
        answers.append(line[3:])


In [3]:

# Tokenize the data using DistilBERT tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
questions_tokens = tokenizer(questions, padding=True, truncation=True, return_tensors='tf')

input_ids = questions_tokens['input_ids']
attention_mask = questions_tokens['attention_mask']

In [4]:
# Create input sequences and pad them
input_sequences = []
for i in range(len(questions)):
    for j in range(1, len(input_ids[i])):
        n_gram_sequence = input_ids[i][:j + 1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = np.array(np.eye(np.max(input_sequences) + 1)[y])

In [5]:

# Build the model with DistilBERT layers
distilbert_model = TFDistilBertModel.from_pretrained('distilbert-base-uncased', trainable=False)
inputs = Input(shape=(max_sequence_length-1,), dtype=tf.int32)
distilbert_output = distilbert_model(inputs)[0]
bi_lstm = Bidirectional(LSTM(100))(distilbert_output)
output = Dense(np.max(input_sequences) + 1, activation='softmax')(bi_lstm)

model = Model(inputs=inputs, outputs=output)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [6]:
# Train the model
model.fit(X, y, epochs=20, verbose=1)

# Save the tokenizer and model
model.save('chatbot_model_distilbert.h5')
with open('tokenizer_distilbert.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


In [9]:
import numpy as np
from tensorflow.keras.models import load_model, Model
from transformers import DistilBertTokenizer, TFDistilBertModel
import pickle
import tensorflow as tf

# Load the tokenizer and model with custom objects
custom_objects = {'TFDistilBertModel': TFDistilBertModel}
model = load_model('chatbot_model_distilbert.h5', custom_objects=custom_objects, compile=False)

with open('tokenizer_distilbert.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Get the max sequence length used during training
if model.layers:
    max_sequence_length = model.layers[1].input_shape[1]

# Load question-answer pairs from text file into a dictionary
qa_dict = {}
with open('que&ans.txt', 'r') as file:
    lines = file.readlines()
    for i in range(0, len(lines), 2):
        question = lines[i].strip()[3:]  # Remove '|Q|'
        answer = lines[i+1].strip()[3:]  # Remove '|A|'
        qa_dict[question] = answer

# Function to generate a response
def generate_response(question):
    # Check if the question is in the dictionary
    if question in qa_dict:
        return "Chatbot: " + qa_dict[question]

    # If not, generate a response using the model
    input_ids = tokenizer.encode(question, return_tensors='tf', max_length=max_sequence_length, padding='max_length', truncation=True)
    response_ids = []

    for i in range(max_sequence_length - 1):
        if i >= len(response_ids): break

        logits = model.predict(input_ids)
        predicted_id = np.argmax(logits)

        if predicted_id == tokenizer.eos_token_id:
            break

        response_ids.append(predicted_id)

        input_ids = np.concatenate([input_ids, np.array([[predicted_id]])], axis=1)[:, -max_sequence_length:]

    response_tokens = tokenizer.decode(response_ids, skip_special_tokens=True)

    # If the model fails to generate a response, raise an exception
    if not response_tokens:
        raise Exception("Model failed to generate a response")

    return "Chatbot: " + response_tokens

# Chat with the model
print("Chatbot: Hi! I'm your chatbot. You can type 'exit' to end the conversation.")
while True:
    try:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        else:
            response = generate_response(user_input)
            print(response)
    except Exception as e:
        print("Chatbot: I'm sorry, Please take the Question from the txt file. Goodbye!")
        break

Chatbot: Hi! I'm your chatbot. You can type 'exit' to end the conversation.
You: Who can checkout a Spark laptop?
Chatbot: Currently enrolled (in the active semester) UNT students, whom are in good standing with UNT and the Library are eligible to check out a laptop from the Spark
You: What is the difference between policy and procedure?
Chatbot: A quick way to distinguish a policy from a procedure is that a policy states “Why” the institution takes certain positions on an issue, and a procedure outlines “How” the institution will implement this policy position. For a more detailed break-down of the difference, please check out our Policies v. Procedures Comparison Table page.
You: what is computer science ?
Chatbot: I'm sorry, Please take the Question from the txt file. Goodbye!
