In [14]:
import numpy as np
import pandas as pd


In [15]:
df=pd.read_excel("QA_cleaned_version54.xlsx")

In [17]:
df.head()

Unnamed: 0,Question,Response
0,access engine,There is a generic user login and home page a...
1,access engine,There is a generic user login and home page a...
2,get engine,There is a generic user login and home page a...
3,access aitek engine,There is a generic user login and home page a...
4,open engine,There is a generic user login and home page a...


In [18]:
from sentence_transformers import SentenceTransformer

In [19]:
# Load the pre-trained BERT model
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')

In [20]:
# Generate sentence embeddings for the dataset
corpus = df['Question'].tolist()
corpus_embeddings = model.encode(corpus, convert_to_tensor=True)

In [21]:
from sklearn.metrics.pairwise import cosine_similarity

def find_most_similar_question(user_input, corpus, corpus_embeddings):
    # Encode the user input
    user_input_embedding = model.encode(user_input, convert_to_tensor=True)
    
    # Reshape user_input_embedding to a 2D tensor
    user_input_embedding = user_input_embedding.reshape(1, -1)

    # Calculate the cosine similarity between the user input and each sentence in the corpus
    similarities = cosine_similarity(user_input_embedding, corpus_embeddings)

    # Find the index of the most similar sentence
    most_similar_idx = np.argmax(similarities)
    
    # Get the similarity score of the most similar question
    similarity_score = similarities[0][most_similar_idx]
    
    similarity_threshold=0.5

    # Check if the similarity score is below the threshold
    if similarity_score < similarity_threshold:
        return "I can't answer your question"

    # Return the most similar question
    return corpus[most_similar_idx]

   

In [22]:
def find_answer(most_similar_question):
    
    if most_similar_question not in df['Question'].tolist():
        return "Sorry, I could not find an answer to your question."

    # Find the answer corresponding to the most similar question in the dataset
    answer = df.loc[df['Question'] == most_similar_question, 'Response'].iloc[0]

    # Return the answer
    return answer

In [23]:
def chatbot():
    # Get user input
    user_input = input("USER: ")

    # Find the most similar question in the dataset
    most_similar_question = find_most_similar_question(user_input, corpus, corpus_embeddings)

    # Find the answer corresponding to the most similar question in the dataset
    answer = find_answer(most_similar_question)

    # Return the answer
    print("BOT: " + answer)

In [24]:
chatbot()


USER:  how to access the engine


BOT:  There is a generic user login and home page available. However, it can be specific for each customer and modifiable depending on business needs.


### Passing the test dataset through the model to get results


In [25]:
df1=pd.read_excel("test_Dataset_last_version1.xlsx")

In [26]:
# Define lists to store predicted and true answers
predicted_answers = []
true_answers = []

# Define lists to store question, most similar question, predicted answer, and true answer
questions = []
most_similar_questions = []
predicted_answers = []
true_answers = []

# Loop through each question in the dataset and test the chatbot
for question in df1['Question']:
    # Find the most similar question in the dataset
    most_similar_question = find_most_similar_question(question, corpus, corpus_embeddings)
    
    # Find the answer corresponding to the most similar question in the dataset
    predicted_answer = find_answer(most_similar_question)
    
    # Get the true answer from the dataset
    true_answer = df1.loc[df1['Question'] == question, 'Response'].iloc[0]
    

    # Append the predicted and true answers to the respective lists
    questions.append(question)
    most_similar_questions.append(most_similar_question)
    predicted_answers.append(predicted_answer)
    true_answers.append(true_answer)

# Create a dataframe to store the results
results_df = pd.DataFrame({
    'question': questions,
    'most_similar_question': most_similar_questions,
    'predicted_answer': predicted_answers,
    'true_answer': true_answers
})

# Display the dataframe
results_df



Unnamed: 0,question,most_similar_question,predicted_answer,true_answer
0,How can I access the engine?,get engine access,There is a generic user login and home page a...,There is a generic user login and home page av...
1,How do I get to the engine?,get engine,There is a generic user login and home page a...,There is a generic user login and home page av...
2,How do I open the engine?,open engine,There is a generic user login and home page a...,There is a generic user login and home page av...
3,How do I reach the engine?,get engine access,There is a generic user login and home page a...,There is a generic user login and home page av...
4,How do I get access to the engine?,get engine access,There is a generic user login and home page a...,There is a generic user login and home page av...
...,...,...,...,...
190,Can I manage the missions of the strategy?,want know manage mission strategy,"An administrator can create, edit, and deploy...","An administrator can create, edit, and deploy..."
191,Can I manage the missions of the strategy mys...,want know manage mission strategy,"An administrator can create, edit, and deploy...","An administrator can create, edit, and deploy..."
192,Can I manage the missions of the strategy by ...,manage mission strategy without help,"An administrator can create, edit, and deploy...","An administrator can create, edit, and deploy..."
193,Can I manage the missions of the strategy on ...,manage mission strategy autonomously without help,"An administrator can create, edit, and deploy...","An administrator can create, edit, and deploy..."


In [27]:
results_df.to_excel('test_dataset_results.xlsx', index=False)

df.to_excel('QA_cleaned.xlsx', index=False)
