

```
# Google Dialogflow - Knowledge Base
```



In [None]:
# receives
# .txt file containing a list of question-answer pairs identified by 'P: ' and 'R: '
# .txt file with a list of questions, one question per line

# or

# .txt file containing raw text
# .txt file with a list of questions, one question per line

# retrieves
# .txt file containing the posed questions and respective Dialogflow's answers, identified by 'P: ' and 'R: '

In [None]:
pip install google-cloud-dialogflow

In [None]:
pip install google-api-core==1.28.0

In [None]:
from google.cloud import dialogflow_v2beta1 as dialogflow
from google.api_core.exceptions import InvalidArgument
import time

In [None]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# google cloud project credentials
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = # path to json file with credentials
!echo $GOOGLE_APPLICATION_CREDENTIALS

In [None]:
# file management
def open_file(filename):
    read_file = open(filename, 'r')
    file_cont = read_file.readlines()
    read_file.close()

    return file_cont

def write_file(filename, content):
    file_write = open(filename, 'w')
    file_write.writelines(content)
    file_write.close()

In [None]:
# creates a knowledge base, empty
def create_knowledge_base(project_id, display_name):
    client = dialogflow.KnowledgeBasesClient()
    project_path = client.common_project_path(project_id)
    knowledge_base = dialogflow.KnowledgeBase(display_name=display_name)
    response = client.create_knowledge_base(parent=project_path, knowledge_base=knowledge_base)

    print('Created Knowledge Base:')
    print(' - Display Name: {}'.format(response.display_name))
    print(' - Knowledge Base ID: {}\n'.format(response.name))

    return response.name

# creates a document to insert in knowledge base
def create_document(project_id, knowledge_base_id, display_name, mime_type, knowledge_type, content_uri):
    client = dialogflow.DocumentsClient()
    knowledge_base_path = dialogflow.KnowledgeBasesClient.knowledge_base_path(project_id, knowledge_base_id)
    
    in_file = open(content_uri, "rb")
    fileBytes = in_file.read()
    in_file.close()

    document = dialogflow.Document(display_name=display_name, mime_type=mime_type,raw_content=fileBytes)
    document.knowledge_types.append(getattr(dialogflow.Document.KnowledgeType, knowledge_type))

    response = client.create_document(parent=knowledge_base_id, document=document)
    print('... waiting for results ...\n')
    document = response.result(timeout=500)

    print('Document ' + display_name + ' created!')

In [None]:
# given a question, retrieves answer from knowledge base
def retrieve_answer(project_id, session_id, language_code, knowledge_base_id, question):
    session_client = dialogflow.SessionsClient()
    session_path = session_client.session_path(project_id, session_id)

    text_input = dialogflow.TextInput(text=question, language_code=language_code)
    query_input = dialogflow.QueryInput(text=text_input)
    query_params = dialogflow.QueryParameters(knowledge_base_names=[knowledge_base_id])

    request = dialogflow.DetectIntentRequest(session=session_path, query_input=query_input, query_params=query_params)

    response = session_client.detect_intent(request=request)
    knowledge_answers = response.query_result.knowledge_answers

    return knowledge_answers

In [None]:
# retrieves a file with the posed questions and Dialogflows's answers
def retrieve_all_answers(project_id, session_id, language_code, knowledge_base_id, input_questions_file_path, save_file_path):
    input_questions = open_file(input_questions_file_path)
    final_file_content =  []

    for i in range(len(input_questions)):
        # maximum lenght for an input question
        aux_question = input_questions[i]
        if len(aux_question) > 256:
            aux_question = aux_question[0:256]

        # retrieves answer to input question
        knowledge_answers = retrieve_answer(project_id, session_id, language_code, knowledge_base_id, aux_question)
        print('Response %d retrieved' % (i+1))

        # creates the final file containing all posed questions and respective retrieved answers, with 'P: ' and 'R: ' identifiers
        aux_q ='P:' + input_questions[i]
        final_file_content.append(aux_q)
        
        for answers in knowledge_answers.answers:
            aux_a = 'R:' + answers.answer
            responses.append(aux_a)
            break

        final_file_content.append('\n')
        final_file_content.append('\n')

    write_file(save_file_path, final_file_content)
    print('File with posed questions and respective answers created!')

In [None]:
# creates knowledge base, adds document and stores answers in file
def testing(project_id, display_name, documents_info, input_questions_file_path, save_file_path):
    # create knowledge base
    knowledge_base_id = create_knowledge_base(project_id, display_name)

    # add document to knowledge base
    for i in range(len(documents_info)):
        create_document(project_id, knowledge_base_id, documents_info[i][0], documents_info[i][1], documents_info[i][2], documents_info[i][3])

    # retrieve and store answers
    retrieve_all_answers(project_id, '1234567890', 'en', knowledge_base_id, input_questions_file_path, save_file_path)

In [None]:
# NOTES

# prior to running the code, google cloud credentials and a project must be created on the google cloud platform
# https://console.cloud.google.com/?hl=pt

# in addition, a google dialogflow agent must be created on the dialogflow platform and beta features enabled
# https://dialogflow.cloud.google.com/

# domain_file_path - path to the file containing the domain
# FAQs
# can be .csv, where file_type = 'text/csv'
# can be .html, where file_type = 'text/html'

# Text
# can be .txt, where file_type = 'text/plain'
# can be .html, where file_type = 'text/html'
# can be .pdf, where file_type = 'application/pdf'

# questions_file_path - path to the file containing all questions, one question per line
# Q1
# Q2
# Q3
# ...
# must be a .txt file

# save_file_path - path to the file where the posed questions and respective retrieved answers are to be saved
# must be a .txt file

In [None]:
# FAQs knowledge base
document_name = # 'doc_name'
file_type = # 'file_type'
knowledge_type = 'FAQ'

domain_file_path = # 'domain_file_path'
input_questions_file_path = # 'input_questions_file_path'
save_file_path = # 'input_questions_file_path'

# create one document
document_1 = [document_name, file_type, knowledge_type, domain_file_path]
documents_info = [document_1]

testing(project_id, display_name, documents_info, input_questions_file_path, save_file_path)

In [None]:
# Text knowledge base
document_name = # 'doc_name'
file_type = # 'file_type'
knowledge_type = 'EXTRACTIVE_QA'

domain_file_path = # 'domain_file_path'
input_questions_file_path = # 'input_questions_file_path'
save_file_path = # 'save_file_path'

# create one document
document_1 = [document_name, file_type, knowledge_type, domain_file_path]
documents_info = [document_1]

testing(project_id, display_name, documents_info, input_questions_file_path, save_file_path)