# Test the Dialogflow API in Jupyter
This is a notebook to test out specific Dialoflow functions in a Jupyter notebook.
You will need the correct Google Cloud credentials setup for this notebook to work. 

In [1]:
import os
from typing import List
#from google.cloud import dialogflow
from google.cloud import dialogflow_v2beta1 as dialogflow
#from google.protobuf.json_format import MessageToJson

In [7]:
#initialize values
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'google_cloud_credentials.json'

main_project = ''
main_project_path = 'projects/{}'.format(main_project)
main_agent_path = 'projects/{}/agent'.format(main_project)

testing_project = ''
testing_parent = 'projects/{}'.format(testing_project)

## Agent

In [None]:
# export the main agent
parent = main_parent

agent_client = dialogflow.AgentsClient()
export_agent_request = dialogflow.ExportAgentRequest(parent=parent)
agent_client.export_agent(export_agent_request)

In [None]:
# train the main agent
parent = main_parent

agent_client = dialogflow.AgentsClient()
train_agent_request = dialogflow.TrainAgentRequest(parent=parent)
response = agent_client.train_agent(train_agent_request)
response

In [None]:
# create the testing agent
agent_client = dialogflow.AgentsClient()
parent = testing_parent
testing_agent_name = 'nlp_testing_agent'
default_language_code="en"
time_zone="America/Los_Angeles"
tier = 'TIER_STANDARD'
#tier="TIER_ENTERPRISE"


agent = dialogflow.Agent(parent=parent, display_name=testing_agent_name,
                        default_language_code=default_language_code,
                         time_zone=time_zone, tier=tier)

response = agent_client.set_agent(request={"agent": agent})
response

In [None]:
# delete the test agent
parent = testing_parent

agent_client = dialogflow.AgentsClient()
response = agent_client.delete_agent(parent=parent)
response

In [None]:
# get the testing agent
parent = testing_parent

agent_client = dialogflow.AgentsClient()
get_agent_request = dialogflow.GetAgentRequest(parent=parent)
response = agent_client.get_agent(get_agent_request)
response


## Intents

In [3]:
# intent prediction using main agent without knowledge graph

def detect_intent_texts(project_id, session_id, texts, language_code):
    """Returns the result of detect intent with texts as inputs.

    Using the same `session_id` between requests allows continuation
    of the conversation."""
    from google.cloud import dialogflow

    session_client = dialogflow.SessionsClient()

    session = session_client.session_path(project_id, session_id)
    print("Session path: {}\n".format(session))

    for text in texts:
        text_input = dialogflow.TextInput(text=text, language_code=language_code)

        query_input = dialogflow.QueryInput(text=text_input)

        response = session_client.detect_intent(
            request={"session": session, "query_input": query_input}
        )

        print("=" * 20)
        print("Query text: {}".format(response.query_result.query_text))
        print(
            "Detected intent: {} (confidence: {})\n".format(
                response.query_result.intent.display_name,
                response.query_result.intent_detection_confidence,
            )
        )
        print("Fulfillment text: {}\n".format(response.query_result.fulfillment_text))
        
        
project_id = main_project
session_id = '123456789'
language_code = 'en-US'
texts = ['Hi!', 'I need a burger']


session_client = dialogflow.SessionsClient()
session_client
detect_intent_texts(project_id, session_id, texts, language_code)

Session path: projects/burgerbot-bskc/agent/sessions/123456789

Query text: Hi!
Detected intent: smalltalk.greetings.hello (confidence: 1.0)

Fulfillment text: 

Query text: I need a burger
Detected intent:  (confidence: 0.6668421030044556)

Fulfillment text: I'm glad to help. What can I do for you?



In [4]:
def create_intent_list(intents, ignore_ml_disabled=True):
    '''
    Oveview: A function to create a dictionary of intents to create in test agent
    Depends On: None
    Constraints: Function has been tested mostly igorning intents disabled for machine learning. 
    Input:
        1. intents (ListIntentsPager): A list of intents from Dialogflow API
    Outputs:
        1. intent_dict (dict): A dictionary containing intent path as the key and the intent display name and intent id as values
        Ex: projects/burgerbot-bskc/agent/intents/01a0d3bf-582a-4f4a-81b2-67763d65f6b4': ('ask_transfer_charge', '01a0d3bf-582a-4f4a-81b2-67763d65f6b4')
    
    '''
    intent_dict = {}
    if  ignore_ml_disabled==True:
        # save intent ids to list
        for intent in intents:
            intent_path = intent.name
            ml_disabled = intent.ml_disabled
            if ml_disabled == False:
                intent_display_name = intent.display_name
                splitted = str(intent_path).split("/")
                intent_id = splitted[-1]
                intent_dict[intent_path] = (intent_display_name, intent_id)
            else:
                intent_display_name = intent.display_name
                splitted = str(intent_path).split("/")
                intent_id = splitted[-1]
                intent_dict[intent_path] = (intent_display_name, intent_id)
        return intent_dict
    
    if ignore_ml_disabled==False:
        # save intent ids to list
        for intent in intents:
            intent_path = intent.name
            ml_disabled = intent.ml_disabled
            if ml_disabled == False:
                intent_display_name = intent.display_name
                splitted = str(intent_path).split("/")
                intent_id = splitted[-1]
                intent_dict[intent_path] = (intent_display_name, intent_id)
            else:
                intent_display_name = intent.display_name
                splitted = str(intent_path).split("/")
                intent_id = splitted[-1]
                intent_dict[intent_path] = (intent_display_name, intent_id)
        return intent_dict 

In [8]:
# get list of intents from main agent
intent_client = dialogflow.IntentsClient()

intent_list = intent_client.list_intents(parent=main_agent_path)
intent_dict = create_intent_list(intent_list)
 

In [9]:
intent2 = list(intent_dict.values())


In [10]:
intent3 = [intent2[i][0] for i in range(len(intent2))]

In [11]:
import pandas as pd
intent_df = pd.DataFrame(intent3, columns=['intent_name'])
intent_df = intent_df.sort_values(by='intent_name')

In [18]:
intent_df.to_excel('intent_list.xlsx', index=False)

In [None]:
# list the intents in main agent
project_id = main_project
parent_path = 'projects/{}/agent'.format(project_id)

intents_client = dialogflow.IntentsClient()
intents = intents_client.list_intents(parent=parent_path)
#intents

In [None]:
# get intents in the main agent
intent_name = 'projects/burgerbot-bskc/agent/intents/b7f67a9b-0c7e-45ba-9bf9-ebed739be037'

intent_client = dialogflow.IntentsClient()
intent_view = dialogflow.IntentView.INTENT_VIEW_FULL
get_intent_request = dialogflow.GetIntentRequest(name=intent_name, intent_view=intent_view)
intent = intent_client.get_intent(get_intent_request)

In [None]:
# get training phrases and entities from intent
print(intent.training_phrases[2].parts[0])
print(intent.training_phrases[2].parts[0].text)
print(intent.training_phrases[2].parts[0].entity_type)
print(intent.training_phrases[2].parts[0].alias)

In [None]:
# create new basic intent
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)
new_intent_name = "Test Create New Intent"

intent_client = dialogflow.IntentsClient()
intent = dialogflow.Intent(display_name = new_intent_name)

create_intent_request = dialogflow.CreateIntentRequest(parent=parent_path, intent=intent)
response = intent_client.create_intent(create_intent_request)

In [None]:
# list the intents in testing agent
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

intents_client = dialogflow.IntentsClient()
intents = intents_client.list_intents(parent=parent_path)
intents

In [None]:
# delete intent
intent_name = 'projects/burgerbot-bskc-test-agent/agent/intents/f5756524-8ae4-4d9b-bc19-bcc6d61150fb'

intent_client = dialogflow.IntentsClient()
intent_client.delete_intent(name=intent_name)

In [None]:
# create intent with training phrase
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

intent_client = dialogflow.IntentsClient()
part = dialogflow.Intent.TrainingPhrase.Part(text = "Test Training Phrase to Invoke")
training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part])

intent = dialogflow.Intent(display_name = "Test Create New Intent", training_phrases = [training_phrase])
create_intent_request = dialogflow.CreateIntentRequest(parent=parent_path, intent=intent)
response = intent_client.create_intent(create_intent_request)
response

In [None]:
# create intent with multiple training phrases
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)
intent_name = 'multiple training phrases'
create_training_phrases = ["First Training Phrase", "Second Training Phrase"]


intent_client = dialogflow.IntentsClient()
trained_phrases = []
for phrase in create_training_phrases:
    part = dialogflow.Intent.TrainingPhrase.Part(text = phrase)
    training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part])
    trained_phrases.append(training_phrase)
    
intent = dialogflow.Intent(display_name = intent_name, training_phrases = trained_phrases)
create_intent_request = dialogflow.CreateIntentRequest(parent=parent_path, intent=intent)
response = intent_client.create_intent(create_intent_request)
response

In [None]:
# create intent with training phrase and entity
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

training_phrase = "What time is Citibank open?"
entity_type = '@organization'
alias = 'organization'

intent_client = dialogflow.IntentsClient()
part = dialogflow.Intent.TrainingPhrase.Part(text = training_phrase, entity_type=entity_type, alias=alias)
training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part])

intent = dialogflow.Intent(display_name = "Test Create New Intent", training_phrases = [training_phrase])
create_intent_request = dialogflow.CreateIntentRequest(parent=parent_path, intent=intent)
response = intent_client.create_intent(create_intent_request)
response

In [None]:
# create intent with training phrase and multiple entities

project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

intent_client = dialogflow.IntentsClient()
part1 = dialogflow.Intent.TrainingPhrase.Part(text = "What")
part2 = dialogflow.Intent.TrainingPhrase.Part(text = "time", entity_type='@time', alias='time')
part3 = dialogflow.Intent.TrainingPhrase.Part(text = "is")
part4 = dialogflow.Intent.TrainingPhrase.Part(text = "Citibank", entity_type='@organization', alias='organization')
part5 = dialogflow.Intent.TrainingPhrase.Part(text = "open?")
training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part1, part2, part3, part4, part5])

intent = dialogflow.Intent(display_name = "intent with training phrase and multiple entities", training_phrases = [training_phrase])
create_intent_request = dialogflow.CreateIntentRequest(parent=parent_path, intent=intent)
response = intent_client.create_intent(create_intent_request)
response

In [None]:
# create intent with multiple training phrases and entities
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)
intent_name = 'multiple training phrases and entities'
create_training_phrases = ["What time is Citibank open?", "Is the bank Citibank closed?", "Is the bank still open"]
create_entities = ['@organization','@organization', '']
create_entity_alias = ['organization','organization', '']

intent_client = dialogflow.IntentsClient()
trained_phrases = []
for i in range(len(create_training_phrases)):
    part = dialogflow.Intent.TrainingPhrase.Part(text = create_training_phrases[i], entity_type=create_entities[i], alias=create_entity_alias[i])
    training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part])
    trained_phrases.append(training_phrase)
    
intent = dialogflow.Intent(display_name = intent_name, training_phrases = trained_phrases)
create_intent_request = dialogflow.CreateIntentRequest(parent=parent_path, intent=intent)
response = intent_client.create_intent(create_intent_request)
response

In [None]:
# batch create intents with training phrases
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

intent_client = dialogflow.IntentsClient()
part = dialogflow.Intent.TrainingPhrase.Part(text = "Training Phrase to Invoke")
training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part])
intent1 = dialogflow.Intent(display_name = "Jason's New Intent", training_phrases = [training_phrase])

part = dialogflow.Intent.TrainingPhrase.Part(text = "Training Phrase to Invoke2")
training_phrase = dialogflow.Intent.TrainingPhrase(parts=[part])
intent2 = dialogflow.Intent(display_name = "Jason's New Intent2", training_phrases = [training_phrase])

intents: List[dialogflow.Intent] = [intent1,intent2]
intent_batch = dialogflow.IntentBatch(intents=intents)
batch_update_intent_request = dialogflow.BatchUpdateIntentsRequest(parent=parent_path, intent_batch_inline=intent_batch)
response = intent_client.batch_update_intents(batch_update_intent_request)

## Entities

In [None]:
# List entity types
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

entity_client = dialogflow.EntityTypesClient()
response = entity_client.list_entity_types(parent=parent_path)
response

In [None]:
# create entity type
# https://stackoverflow.com/questions/55233717/how-to-upload-entity-and-its-synonyms-to-dialogflow-using-v2-api
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)
kind = 'KIND_MAP'
display_name = 'organization'

entity_client = dialogflow.EntityTypesClient()
entity = dialogflow.EntityType(display_name=display_name, kind=kind)
create_entity_type_request = dialogflow.CreateEntityTypeRequest(parent=parent_path, entity_type=entity)
response = entity_client.create_entity_type(create_entity_type_request)
print('Entity type created: \n{}'.format(response))


In [None]:
# single get entity type
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

entity_type_id = '8127cad6-bc78-463d-b18d-08fb4cf20b09'
entity_path = parent_path+'/entityTypes/'+entity_type_id

entity_client = dialogflow.EntityTypesClient()
get_entity_type = entity_client.get_entity_type(name=entity_path)
get_entity_type

In [None]:
# batch update entity type
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)

entity_type_id = '8127cad6-bc78-463d-b18d-08fb4cf20b09'
entity_path = parent_path+'/entityTypes/'+entity_type_id

entity_type_client = dialogflow.EntityTypesClient()
# first get the entity to update then update one of is values
#entity_to_update = get_entity_type
#entity_to_update.display_name = "organization 2"
entity_type_UUID = '8127cad6-bc78-463d-b18d-08fb4cf20b09'
entity_type_to_update = get_entity_type
entity_type_to_update.display_name='hello'

entity_types: List[dialogflow.EntityType] = [entity_type_to_update]
entity_types_batch = dialogflow.EntityTypeBatch(entity_types=entity_types)
batch_update_entity_type_request = dialogflow.BatchUpdateEntityTypesRequest(parent=parent_path, entity_type_batch_inline=entity_types_batch)

response = entity_type_client.batch_update_entity_types(batch_update_entity_type_request)

In [None]:
# Create an entity type
project_id = testing_project
parent_path = 'projects/{}/agent'.format(project_id)
entity_type_path = 'projects/burgerbot-bskc-test-agent/agent/entityTypes/3e4a9d84-fae2-450c-bb91-557903cdf6fa'
entity_value = 'WAMU'
synonyms_list = ['Washington Mutual']

# Note: synonyms must be exactly [entity_value] if the
# entity_type's kind is KIND_LIST
entity_client = dialogflow.EntityTypesClient()
synonyms = synonyms or [entity_value]

entity = dialogflow.EntityType.Entity()
entity.value = entity_value
entity.synonyms.extend(synonyms)
print(entity)

#batch_create_entities_request = dialogflow.BatchCreateEntitiesRequest(entity_type_path, [entity])
#response = entity_client.batch_create_entities(batch_create_entities_request)
response = entity_client.batch_create_entities(entity_type_path, [entity])
print('Entity created: {}'.format(response))

## Documents

In [None]:
# list documents
project_id = main_project
knowledge_base_id = 'MTY4NTMzMzczMjAyOTQ3MTEyOTY'

doc_client = dialogflow.DocumentsClient()
knowledge_base_path = kb_client.knowledge_base_path(project_id, knowledge_base_id)
list_doc_response = doc_client.list_documents(parent=knowledge_base_path)
#list_doc_response

In [None]:
# get document
project_id = main_project

doc_client = dialogflow.DocumentsClient()
doc_name = list_doc_response.documents[1].name
get_doc_response = doc_client.get_document(name=doc_name)

## Knowledge Base

In [None]:
#list_knowledge_bases
project_id = main_project
parent_path = 'projects/{}'.format(project_id)

kb_client = dialogflow.KnowledgeBasesClient()
kb_list = kb_client.list_knowledge_bases(parent=parent_path)

In [None]:
# get knowledge base
project_id = main_project
knowledge_base_id = 'MTY4NTMzMzczMjAyOTQ3MTEyOTY'

kb_client = dialogflow.KnowledgeBasesClient()
knowledge_base_path = kb_client.knowledge_base_path(project_id, knowledge_base_id)
response = kb_client.get_knowledge_base(name=knowledge_base_path)

print("Got Knowledge Base:")
print(" - Display Name: {}".format(response.display_name))
print(" - Knowledge ID: {}".format(response.name))

In [None]:
# create new empty knowledge base
project_id = main_project
parent_path = 'projects/{}'.format(project_id)
new_kb_name = 'test2'

kb_client = dialogflow.KnowledgeBasesClient()
knowledge_base = dialogflow.KnowledgeBase(display_name=new_kb_name)
response = kb_client.create_knowledge_base(parent=parent_path, knowledge_base=knowledge_base)

In [None]:
# add document to new knowledge base
project_id = main_project
knowledge_base_id = 'NTYxMjM1MjY1MDM3Nzk1MzI4MA'
display_name = get_doc_response.display_name
mime_type = get_doc_response.mime_type
raw_content = get_doc_response.raw_content
knowledge_type = 'FAQ'


doc_client = dialogflow.DocumentsClient()
knowledge_base_path = dialogflow.KnowledgeBasesClient.knowledge_base_path(project_id, knowledge_base_id)
document = dialogflow.Document(display_name=display_name, mime_type=mime_type, raw_content=raw_content)
document.knowledge_types.append(getattr(dialogflow.Document.KnowledgeType, knowledge_type))
response = doc_client.create_document(parent=knowledge_base_path, document=document)
print("Waiting for results...")
document = response.result(timeout=120)
print("Created Document:")
print(" - Display Name: {}".format(document.display_name))
print(" - Knowledge ID: {}".format(document.name))


## Other

### Compare Language Files

In [286]:
import zipfile
from os import listdir
from os.path import isfile, join

path_to_zip_file = 'data/export/Dolores_training_exp.zip'
path_to_extract = 'data/export/2022_02_02/'

with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(path_to_extract)

In [287]:
mypath = path_to_extract+'/intents/'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles[0:20]

['!cancel_intent.json',
 '!cancel_intent_usersays_en.json',
 '!no_intent.json',
 '!no_intent_usersays_en.json',
 '!submit_intent.json',
 '!submit_intent_usersays_en.json',
 '!yes_intent.json',
 '!yes_intent_usersays_en.json',
 'cme.1.0-callme_entry.json',
 'cme.1.0-callme_entry_usersays_en-au.json',
 'cme.1.0-callme_entry_usersays_en-us.json',
 'cme.1.0-callme_entry_usersays_en.json',
 'cme.3.1-callme_cancel.json',
 'cme.3.1-callme_cancel_usersays_en-au.json',
 'cme.3.1-callme_cancel_usersays_en-us.json',
 'cme.3.1-callme_cancel_usersays_en.json',
 'cme.3.2-callme_country_not_found.json',
 'cme.3.2-callme_country_not_found_usersays_en-au.json',
 'cme.3.2-callme_country_not_found_usersays_en-us.json',
 'cme.3.2-callme_country_not_found_usersays_en.json']

In [288]:
user_says_en = [x for x in onlyfiles if '_usersays_en' in x and '_usersays_en-au' not in x and '_usersays_en-us' not in x]
user_says_en_au = [x for x in onlyfiles if '_usersays_en-au' in x]
user_says_en_us = [x for x in onlyfiles if '_usersays_en-us' in x]

In [293]:
print("EN rows:", len(user_says_en))
print("AU rows:", len(user_says_en_au))
print("US rows:", len(user_says_en_us))

EN rows: 346
AU rows: 342
US rows: 342


In [294]:
import re

def extract_intent_phrases(intent_json_list):
    regex = r"(_usersays_).*$"
    subst = ""
    extracted_data = []
    dict_data = {}
    for file in intent_json_list:
        json_file_path = 'data/export/2022_02_02/intents/{}'.format(file)
        f = open(json_file_path, 'r', encoding='utf-8')
        data = json.load(f)
        intent_info_list = []
        for i in range(len(data)):
            id_info = data[i]['id']
            text_info = data[i]['data'][0]['text']
            intent_info = [id_info, text_info]
            intent_info_list.append(intent_info)
        
        cleaned_name =  re.sub(regex, subst, file, 0, re.MULTILINE)
        dict_data[cleaned_name] = intent_info_list
        #extracted_data.append(data)
    return dict_data

In [295]:
intent_info_list = []
for i in range(len(data)):
    id_info = data[i]['id']
    text_info = data[i]['data'][0]['text']
    intent_info = [id_info, text_info]
    intent_info_list.append(intent_info)
    #dict_t['faq.1.00-account.signing.out'] = intent_info
    
dict_t['faq.1.00-account.signing.out'] = intent_info_list

In [296]:
en_data = extract_intent_phrases(user_says_en)
en_au_data = extract_intent_phrases(user_says_en_au)
en_us_data = extract_intent_phrases(user_says_en_us)

In [297]:
def getList(dict):
    return list(dict.keys())

In [298]:
intents_en = (getList(en_data))
intents_en_au = (getList(en_au_data))
intents_en_us = (getList(en_us_data))

In [299]:
full_list_intents = list(set(intents_en + intents_en_au + intents_en_us))

In [300]:
full_list_intents.sort()

In [301]:
#verify that all en phrases, au and us phrases have all the same ids

i = 0
for intent in full_list_intents:
    en = en_data.get(intent)
    au = en_au_data.get(intent)
    us = en_us_data.get(intent)
    
    
    #check if intent exists exists and lengths of intents are same
    if (au and en and us) and (len(en) == len(au) and len(au) == len(us)) :
        #print(True)
        for j in range(len(en)):
            a = en[j][1]
            b = au[j][1]
            c = us[j][1]
            if a!=b or b!=c:
                #print(True)
                print('intent:',i, intent)
                print('en!=au:',a!=b)
                print('au!=us:',b!=c)
            
        i += 1
    else:
        print(False)
        print(i)
        i += 1
    

False
0
False
1
False
2
False
3


In [302]:
en = en_data.get('roe.1.0-talk_to_human_entry')
au = en_au_data.get('roe.1.0-talk_to_human_entry')
us = en_us_data.get('roe.1.0-talk_to_human_entry')

In [303]:
len(en)

729

In [304]:
len(au)

729

In [305]:
len(us)

729

In [306]:
for i in range(len(en)):
    if en[i][0] != au[i][0]:
        print(i)
        print(au[i][0])

0
d98757e9-3de8-40a0-ab6f-63f7dc174bac
1
f629b3c3-7626-4095-a329-c16f5fdcd6c2


In [307]:
for i in range(len(en)):
    if en[i][0] != us[i][0]:
        print(i)
        print(us[i][0])

0
45858a04-f7eb-4afe-acdf-706970126dad
1
206bfa58-d468-4cd4-8302-1070508eb82d


In [308]:
for i in range(len(en)):
    if au[i][0] != us[i][0]:
        print(i)
        print(au[i][0])

0
d98757e9-3de8-40a0-ab6f-63f7dc174bac
1
f629b3c3-7626-4095-a329-c16f5fdcd6c2


In [282]:
en[0][0]

'2de74b71-1147-4c56-8f2a-52932dfa00e0'

In [283]:
au[0][0]

'd98757e9-3de8-40a0-ab6f-63f7dc174bac'

In [284]:
us[0][0]

'45858a04-f7eb-4afe-acdf-706970126dad'

In [271]:
en[0][1]

'Resolve unpaid items'

In [267]:
au[1][1]

'Call us'

### Extract Phrases from Intents

In [None]:
# load libraries
import re
import os
import zipfile
from os import listdir
from os.path import isfile, join

In [None]:
# extract files out of zip

path_to_zip_file = 'data/export/Dolores_training_exp.zip'
path_to_extract = 'data/export/2022_02_02/'

with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(path_to_extract)

In [311]:
# get a list of all intent json files

mypath = path_to_extract+'/intents/'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles[0:5]

['!cancel_intent.json',
 '!cancel_intent_usersays_en.json',
 '!no_intent.json',
 '!no_intent_usersays_en.json',
 '!submit_intent.json']

In [312]:
# filter the list of intent json files by en, au, us

user_says_en = [x for x in onlyfiles if '_usersays_en' in x and '_usersays_en-au' not in x and '_usersays_en-us' not in x]
user_says_en_au = [x for x in onlyfiles if '_usersays_en-au' in x]
user_says_en_us = [x for x in onlyfiles if '_usersays_en-us' in x]
print("EN rows:", len(user_says_en))
print("AU rows:", len(user_says_en_au))
print("US rows:", len(user_says_en_us))

EN rows: 346
AU rows: 342
US rows: 342


In [323]:
# extract intent name, intent ids and intent phrases

def extract_intent_phrases(intent_json_list):
    regex = r"(_usersays_).*$"
    subst = ""
    extracted_data = []
    dict_data = {}
    for file in intent_json_list:
        json_file_path = 'data/export/2022_02_02/intents/{}'.format(file)
        f = open(json_file_path, 'r', encoding='utf-8')
        data = json.load(f)
        intent_info_list = []
        for i in range(len(data)):
            id_info = data[i]['id']
            text_info = data[i]['data'][0]['text']
            intent_info = [id_info, text_info]
            intent_info_list.append(intent_info)
        
        cleaned_name =  re.sub(regex, subst, file, 0, re.MULTILINE)
        dict_data[cleaned_name] = intent_info_list
        #extracted_data.append(data)
    return dict_data

'''
intent_info_list = []
for i in range(len(data)):
    id_info = data[i]['id']
    text_info = data[i]['data'][0]['text']
    intent_info = [id_info, text_info]
    intent_info_list.append(intent_info)
'''    

en_data = extract_intent_phrases(user_says_en)
#en_au_data = extract_intent_phrases(user_says_en_au)
#en_us_data = extract_intent_phrases(user_says_en_us)

In [347]:
def getList(dict):
    return list(dict.keys())

intents_names_en = (getList(en_data))

In [360]:
intent_ids = []
intent_names = []
intent_phrases = []


for intent in intents_names_en:
    data = en_data[intent]
    intent_name_list = []
    intent_phrase_list = []
    intent_id_list = []

    for i in range(len(data)):
        intent_id = data[i][0]
        intent_phrase = data[i][1]

        intent_id_list.append(intent_id)
        intent_phrase_list.append(intent_phrase)
        intent_name_list.append(intent)
    
    intent_names.append(intent_name_list)
    intent_phrases.append(intent_phrase_list)
    intent_ids.append(intent_id_list)
        
    
flat_intent_names = [item for sublist in intent_names for item in sublist]
flat_intent_phrases = [item for sublist in intent_phrases for item in sublist]
flat_intend_ids = [item for sublist in intent_ids for item in sublist]

pd_data = list(zip(flat_intent_names,flat_intent_phrases, flat_intend_ids))

In [365]:
output_df = pd.DataFrame(pd_data, columns=['intent_name', 'phrase', 'id'])
output_df.head()

Unnamed: 0,intent_name,phrase,id
0,!cancel_intent,Why are you replying with nothing to do with m...,ec598522-2f4c-4b0f-a076-25d1c97dae62
1,!cancel_intent,abandon,f59490aa-1f0e-46e7-a6db-e6a41042c6ca
2,!cancel_intent,break,751c7ac7-668b-4dad-9838-910057386b52
3,!cancel_intent,cancel,03fe1499-e697-4fdd-864f-7680f688158c
4,!cancel_intent,cancle,7550224d-9c8b-4435-abe6-1e49e114a899


In [367]:
output_df.to_excel('data/output/intent_phrases/intent_phrases.xlsx', index=False)

In [350]:
intent_name_list

['vrp.5.8-vrp_item_without_case_seller_upi']

In [351]:
intent_phrase_list

['Resolve unpaid items']

In [352]:
intent_id_list

['8172bf16-af7e-47ba-8915-1670b56d41ca']

In [339]:
data = en_data['cme.1.0-callme_entry']
data

[['70109a19-adfa-438c-ab41-8fc69c9b2e7d', 'call me'],
 ['776c810c-440e-4ebb-8844-a10c6e44db39', 'have us call you'],
 ['403140ef-e12f-4718-a074-a3e1e9f38394', 'Have us call you']]

In [343]:
intent_name_list = []
intent_phrase_list = []
intent_id_list = []

for i in range(len(data)):
    intent_id = data[i][0]
    intent_phrase = data[i][1]
    
    intent_id_list.append(intent_id)
    intent_phrase_list.append(intent_phrase)
    
    
    

In [344]:
intent_id_list

['70109a19-adfa-438c-ab41-8fc69c9b2e7d',
 '776c810c-440e-4ebb-8844-a10c6e44db39',
 '403140ef-e12f-4718-a074-a3e1e9f38394']

In [336]:
a#intents_en

['!cancel_intent',
 '!no_intent',
 '!submit_intent',
 '!yes_intent',
 'cme.1.0-callme_entry',
 'cme.3.1-callme_cancel',
 'cme.3.2-callme_country_not_found',
 'cme.3.3.1-callme_change_phone_number_continue',
 'cme.5.0-callme_default_fallback',
 'cme.5.1-callme_no_longer_want_call',
 'cnt.1-cnt_entry_guest_cancel_buyer',
 'cnt.1-cnt_entry_guest_cancel_seller',
 'cnt.1-cnt_entry',
 'cnt.3.0-cnt_display_no',
 'cnt.3.0-cnt_display_yes',
 'cnt.6.2-cnt_rtn',
 'cnt.6.4-cnt_rcs',
 'cnt.7.1-cnt_submission_submit',
 'cnt.7.2-cnt_submission_cancel',
 'cts.1-cts_entry',
 'cts.3.0-cts_display_no',
 'cts.3.0-cts_display_yes',
 'cts.5.1-cts_item_without_case_cnt',
 'cts.5.2-cts_item_without_case_rtn',
 'cts.5.3-cts_item_without_case_inr',
 'cts.5.4-cts_item_without_case_sdm',
 'cts.7.1-cts_submission_submit',
 'cts.7.2-cts_submission_cancel',
 'default_cancel',
 'default_dont_see_item_already_tried_that',
 'default_dont_see_item_next_step_buyer',
 'default_dont_see_item_next_step_seller',
 'default_do

In [331]:
pd.DataFrame.from_dict(en_data, orient='index').transpose()

Unnamed: 0,!cancel_intent,!no_intent,!submit_intent,!yes_intent,cme.1.0-callme_entry,cme.3.1-callme_cancel,cme.3.2-callme_country_not_found,cme.3.3.1-callme_change_phone_number_continue,cme.5.0-callme_default_fallback,cme.5.1-callme_no_longer_want_call,...,vrp.3.0-vrp_display_no,vrp.3.0-vrp_display_yes,vrp.5.1-vrp_item_without_case_tpk,vrp.5.2-vrp_item_without_case_rtn,vrp.5.3-vrp_item_without_case_inr,vrp.5.4-vrp_item_without_case_cnt,vrp.5.5-vrp_item_without_case_seller_rtn,vrp.5.6-vrp_item_without_case_seller_inr,vrp.5.7-vrp_item_without_case_selected_cts,vrp.5.8-vrp_item_without_case_seller_upi
0,"[ec598522-2f4c-4b0f-a076-25d1c97dae62, Why are...","[096dc093-dab1-4711-9128-e2258a20ca95, No]","[378c19de-6b78-4a57-aee1-11a72bc68259, begin]","[4bbfaf5d-57b2-4d41-9dea-d0970c675519, 1]","[70109a19-adfa-438c-ab41-8fc69c9b2e7d, call me]","[ec598522-2f4c-4b0f-a076-25d1c97dae62, Why are...","[3beded04-8a16-4000-9e09-f204fd0c244c, I don't...","[6631cb77-9e62-40b1-9748-0ad02252c7cc, call me]","[b891f020-de7f-4c49-ad29-fa799ba97c14, cool]","[b5e89371-a7a7-4cc5-8afb-522e1d7ce130, can I s...",...,"[096dc093-dab1-4711-9128-e2258a20ca95, No]","[4bbfaf5d-57b2-4d41-9dea-d0970c675519, 1]","[d323e5c3-3a8a-4749-8577-035538918d14, track p...","[3c91239f-852f-49c3-82f7-6d4e2984dce6, return ...","[4c355953-a02d-4402-8875-7d356981a928, Report ...","[6493b220-b5db-4488-8193-60cb43a29ee0, cancel ...","[b81a5a3c-0ccb-4e2e-8334-e1e0bad16aed, handle ...","[a2ed8764-5b19-4e90-9709-0a393890b647, handle ...","[6baf4610-e23c-4fbe-ba41-efc5e99ec4fe, message]","[8172bf16-af7e-47ba-8915-1670b56d41ca, Resolve..."
1,"[f59490aa-1f0e-46e7-a6db-e6a41042c6ca, abandon]","[0fdd5a7d-ba6b-401b-b369-d14e0a1a9665, 2]","[2e7a97d2-13f9-42a7-ac3d-0d0f162a9dd3, do]","[e8835b3b-23b8-40c2-9c10-5bb44594a4c2, 10 4]","[776c810c-440e-4ebb-8844-a10c6e44db39, have us...","[f59490aa-1f0e-46e7-a6db-e6a41042c6ca, abandon]",,"[aa7c38ee-b1fd-4b6b-bc6c-0e71e867d797, continue]","[2f53fa01-7707-481c-9f4b-cacf21b46c59, Finally]","[72b00318-9201-46ef-9c04-96920c8edd3a, can you...",...,"[0fdd5a7d-ba6b-401b-b369-d14e0a1a9665, 2]","[e8835b3b-23b8-40c2-9c10-5bb44594a4c2, 10 4]",,,,"[75d3edfa-8d08-4e74-8972-a6396c47bf03, cancel ...",,,"[b3e7bb5b-c60a-4061-a171-b6a368472b22, message...",
2,"[751c7ac7-668b-4dad-9838-910057386b52, break]","[fbbad88b-29c7-46a8-a08c-5dd83a126d18, definit...","[b706bf45-e936-4fe8-8047-65e633df314c, do it]","[2dd5fb05-379d-4d9f-a8fb-24d2cd6ad07d, 10-4]","[403140ef-e12f-4718-a074-a3e1e9f38394, Have us...","[751c7ac7-668b-4dad-9838-910057386b52, break]",,"[697defee-8370-4df0-a326-b480e51e4d75, continu...","[f2268702-db45-408b-8887-e061768dbe58, glad to...","[61bf596d-e8dd-4f89-bd16-1bfb38c0b47b, cancel ...",...,"[fbbad88b-29c7-46a8-a08c-5dd83a126d18, definit...","[2dd5fb05-379d-4d9f-a8fb-24d2cd6ad07d, 10-4]",,,,,,,"[83fd29b4-bf71-479d-8f9a-8e2fd8cbbc4c, send a ...",
3,"[03fe1499-e697-4fdd-864f-7680f688158c, cancel]","[d5ea0788-5765-422e-96cf-98bc5ce66362, Dont]","[5047bcd4-8e4f-43f0-97fd-389552d71285, done]","[7d73cbb8-f70a-4db5-bae6-84544d034326, affirma...",,"[03fe1499-e697-4fdd-864f-7680f688158c, cancel]",,"[6302e9b5-2278-4c96-adda-e9cf7f61a804, continu...","[c26e44b0-83f6-4be6-803a-c1401119abd9, I will ...","[d9a9873a-54c7-4866-bea2-5d5aff5aa63e, cancel ...",...,"[d5ea0788-5765-422e-96cf-98bc5ce66362, Dont]","[7d73cbb8-f70a-4db5-bae6-84544d034326, affirma...",,,,,,,"[c7774099-7f7a-41f0-8e6e-6b23f997e0f9, send me...",
4,"[7550224d-9c8b-4435-abe6-1e49e114a899, cancle]","[40837b67-e21a-4423-b160-feebc755f2be, Don't]","[5a08685b-b399-400c-9be2-f9a08fa41723, enter]","[f305dd54-7eeb-48e2-a402-280b034efdbf, All right]",,"[7550224d-9c8b-4435-abe6-1e49e114a899, cancle]",,"[9051bac6-d8da-4a4e-8dcc-0a0e950497b8, continu...","[0522e6a9-ab8f-4469-abae-726e146a93aa, I'll ju...","[d1097da2-c5fc-4406-bbc4-d9f9dc4ecb60, clicked...",...,"[40837b67-e21a-4423-b160-feebc755f2be, Don't]","[f305dd54-7eeb-48e2-a402-280b034efdbf, All right]",,,,,,,"[549c08ea-d674-4dfc-a275-8142d26fec62, send th...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
724,,,,,,,,,,,...,,,,,,,,,,
725,,,,,,,,,,,...,,,,,,,,,,
726,,,,,,,,,,,...,,,,,,,,,,
727,,,,,,,,,,,...,,,,,,,,,,


In [325]:
type(en_data)

dict

In [318]:
def getList(dict):
    return list(dict.keys())

intents_en = (getList(en_data))
intents_en_au = (getList(en_au_data))
intents_en_us = (getList(en_us_data))

In [319]:
full_list_intents = list(set(intents_en + intents_en_au + intents_en_us))
full_list_intents.sort()