In [27]:
import numpy as np
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import Word2Vec
from transformers import BertTokenizer, BertModel
import torch

In [28]:
df = pd.read_csv('/content/faq_new.csv')

In [29]:
def wrap_text(text, width=100):
    lines = []
    for line in text.split('\n'):
        while len(line) > width:
            split_at = line[:width].rfind(' ')
            if split_at == -1:
                split_at = width
            lines.append(line[:split_at])
            line = line[split_at:].lstrip()
        lines.append(line)
    return '\n'.join(lines)

In [30]:
questions = df['Questions'].tolist()
answers = df['Answers'].tolist()

###1st Approach - cosine similarity

In [31]:
nlp = spacy.load("en_core_web_sm")

In [32]:
sentences = questions

# input_sentence = "How to get credit card along with co-applicant?"
input_sentence = "I want to know about the security and privacy of my personal details in bank records?"

input_doc = nlp(input_sentence)
sentence_docs = [nlp(sent) for sent in sentences]

similarity_scores = [cosine_similarity(input_doc.vector.reshape(1, -1), sent.vector.reshape(1, -1))[0][0] for sent in sentence_docs]
top_indices = np.argsort(similarity_scores)[-3:][::-1]

print("\033[1m Top 3 matching answers: \033[0m \n")
for i in range(len(top_indices)):
    ques = wrap_text(questions[top_indices[i]])
    ans = wrap_text(answers[top_indices[i]])
    print(f'{i+1}. \033[1m \033[3m Similarity score:\033 {round(float(similarity_scores[top_indices[i]]), 3)}\033[0m \033[0m\n')
    print(f"\033[3m Question:\033[0m {ques} ")
    print(f"\033[3m Answer:\033[0m {ans}\n")

[1m Top 3 matching answers: [0m 

1. [1m [3m Similarity score: 0.642[0m [0m

[3m Question:[0m Can I set up automatic transfers between my accounts and those of family members? 
[3m Answer:[0m Unfortunately, automatic transfers between accounts held by different individuals are not
supported. Each account owner should manage their transfers.

2. [1m [3m Similarity score: 0.554[0m [0m

[3m Question:[0m Are there penalties for early withdrawal from a certificate of deposit (CD)? 
[3m Answer:[0m Yes, early withdrawal from a CD may incur penalties. Review the terms of your CD or contact
customer service for information on withdrawal penalties.

3. [1m [3m Similarity score: 0.543[0m [0m

[3m Question:[0m What is the process for ordering a new set of checks for my account? 
[3m Answer:[0m You can order a new set of checks by visiting a branch, contacting customer service, or using the
check ordering feature in your online banking account.



In [33]:
for token in input_doc:
    print(f"{token.text}, {token.lemma_}")

I, I
want, want
to, to
know, know
about, about
the, the
security, security
and, and
privacy, privacy
of, of
my, my
personal, personal
details, detail
in, in
bank, bank
records, record
?, ?


###2nd approach - Tf-Idf

In [None]:
lowered_ques = [question.lower() for question in questions]
proc_questions = [nlp(i) for i in lowered_ques]
# print(proc_questions)

In [35]:
for token in proc_questions[0]:
    print(f"{token.text}, {token.lemma_}")

how, how
do, do
i, I
update, update
my, my
email, email
address, address
linked, link
to, to
the, the
bank, bank
account, account
?, ?


In [None]:
proc_questions_new = [str(doc) for doc in proc_questions]
# print(proc_questions_new)

In [None]:
vectorizer = TfidfVectorizer()
q_vectors = vectorizer.fit_transform(proc_questions_new)
# print(q_vectors)
# print(len(vectorizer.get_feature_names_out()))

In [38]:
# Get top 3 matches
def get_top_matches(query, processed_questions, answer_vectors):
    ques = query.lower()
    ques_tokenized = nlp(ques)
    query_new = str(ques_tokenized)
    query_vector = vectorizer.transform([query_new])

    similarities = cosine_similarity(query_vector, answer_vectors).flatten()
    top_indices = similarities.argsort()[-3:][::-1]

    top_scores = [similarities[i] for i in top_indices]
    top_answers = [answers[i] for i in top_indices]
    top_questions = [questions[i] for i in top_indices]
    return top_scores, top_questions, top_answers

In [39]:
# user_input = "How to get credit card along with co-applicant?"
user_input = "I want to know about the security and privacy of my personal details in bank records?"
similarity, top_questions, top_matching_answers = get_top_matches(user_input, proc_questions_new, q_vectors)

print("\033[1m Top 3 matching answers: \033[0m \n")
for i in range(3):
    ques = wrap_text(top_questions[i])
    ans = wrap_text(top_matching_answers[i])
    print(f'{i+1}. \033[1m \033[3m Similarity score:\033 {round(similarity[i], 3)}\033[0m \033[0m\n')
    print(f"\033[3m Question:\033[0m {ques} ")
    print(f"\033[3m Answer:\033[0m {ans}\n")

[1m Top 3 matching answers: [0m 

1. [1m [3m Similarity score: 0.363[0m [0m

[3m Question:[0m How can I obtain a copy of the bank's privacy policy? 
[3m Answer:[0m The bank's privacy policy is available on our website. You can also request a copy by visiting a
branch or contacting our customer service.

2. [1m [3m Similarity score: 0.302[0m [0m

[3m Question:[0m What security measures are in place for online transactions? 
[3m Answer:[0m We employ advanced security measures, including encryption, multi-factor authentication, and secure
connections, to protect your online transactions and personal information.

3. [1m [3m Similarity score: 0.278[0m [0m

[3m Question:[0m What types of personal loans does the bank offer? 
[3m Answer:[0m We offer various personal loans, including personal installment loans and lines of credit. Contact
our loan department or visit a branch for details on available options.



###3rd Approach - Word2Vec

In [40]:
sentences = questions

input_sentence = "I want to know about the security and privacy of my personal details in bank records?"

tokenized_sentences = [[token.text for token in nlp(sentence)] for sentence in sentences]
tokenized_input = [token.text for token in nlp(input_sentence)]

model = Word2Vec(sentences=tokenized_sentences, vector_size=150, window=10, min_count=1, workers=4)

def sentence_vector(sentence):
    vectors = [model.wv[word] for word in sentence if word in model.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    return np.zeros(model.vector_size)

input_vector = sentence_vector(tokenized_input)
sentence_vectors = [sentence_vector(sentence) for sentence in tokenized_sentences]

similarity_scores = [cosine_similarity(input_vector.reshape(1, -1), vec.reshape(1, -1))[0][0] for vec in sentence_vectors]

top_indices = np.argsort(similarity_scores)[-3:][::-1]

print("\033[1m Top 3 matching answers: \033[0m \n")
for i in range(len(top_indices)):
  ques = wrap_text(questions[top_indices[i]])
  ans = wrap_text(answers[top_indices[i]])
  print(f'{i+1}. \033[1m \033[3m Similarity score:\033 {round(float(similarity_scores[top_indices[i]]), 3)}\033[0m \033[0m\n')
  print(f"\033[3m Question:\033[0m {ques} ")
  print(f"\033[3m Answer:\033[0m {ans}\n")


[1m Top 3 matching answers: [0m 

1. [1m [3m Similarity score: 0.747[0m [0m

[3m Question:[0m How do I update my email address linked to the bank account? 
[3m Answer:[0m You can update your email address by logging into your online banking account and navigating to the
profile settings. Alternatively, visit a branch or contact customer service.

2. [1m [3m Similarity score: 0.727[0m [0m

[3m Question:[0m How can I update my personal identification information with the bank? 
[3m Answer:[0m You can update personal identification information by visiting a branch, contacting the manager at
the concerned branch

3. [1m [3m Similarity score: 0.724[0m [0m

[3m Question:[0m How can I obtain a copy of the bank's privacy policy? 
[3m Answer:[0m The bank's privacy policy is available on our website. You can also request a copy by visiting a
branch or contacting our customer service.



###4th Approach - BERT

In [41]:
sentences = questions

# input_sentence = "How to get credit card along with co-applicant?"
input_sentence = "I want to know about the security and privacy of my personal details in bank records?"

# pre-trained tokenizer& model
tokenizer = BertTokenizer.from_pretrained('bert-large-cased')
model = BertModel.from_pretrained('bert-large-cased')

input_tokens = tokenizer.encode(input_sentence, return_tensors='pt')
sentence_tokens = [tokenizer.encode(sentence, return_tensors='pt') for sentence in sentences]

#get embeddings
with torch.no_grad():
    input_embeddings = model(input_tokens)['pooler_output']
    sentence_embeddings = [model(tokens)['pooler_output'] for tokens in sentence_tokens]

input_embedding_np = input_embeddings.numpy()
sentence_embeddings_np = [embedding.numpy() for embedding in sentence_embeddings]

similarity_scores = [cosine_similarity(input_embedding_np, embedding)[0][0] for embedding in sentence_embeddings_np]

top_indices = np.argsort(similarity_scores)[-3:][::-1]

for i in range(len(top_indices)):
  ques = wrap_text(questions[top_indices[i]])
  ans = wrap_text(answers[top_indices[i]])
  print(f'{i+1}. \033[1m \033[3m Similarity score:\033 {round(float(similarity_scores[top_indices[i]]), 3)}\033[0m \033[0m\n')
  print(f"\033[3m Question:\033[0m {ques} ")
  print(f"\033[3m Answer:\033[0m {ans}\n")


1. [1m [3m Similarity score: 0.999[0m [0m

[3m Question:[0m Is there a limit on the number of transactions I can make in a month? 
[3m Answer:[0m Transaction limits vary based on the type of account. Review your account terms, fee schedule, or
contact customer service for information on transaction limits.

2. [1m [3m Similarity score: 0.998[0m [0m

[3m Question:[0m Can I add a joint account holder to my existing account? 
[3m Answer:[0m Yes, you can add a joint account holder by visiting a branch together and completing the necessary
paperwork. Contact customer service for details on adding joint account holders.

3. [1m [3m Similarity score: 0.998[0m [0m

[3m Question:[0m How can I change the PIN for my debit card? 
[3m Answer:[0m To change the PIN for your debit card, visit an ATM, follow the on-screen instructions for PIN
change, or contact customer service for assistance.

