In [35]:
import numpy as np
import pandas as pd
import os
import json
import markdown
import pickle

In [36]:
def new_data(data_name, directory):
    # get the name of the new dataset file
    
    DATASET_LOC = os.path.join(directory, data_name)

    # read the data file - csv, excel and json

    if data_name.endswith('.csv'):
        DATASET = pd.read_csv(DATASET_LOC)
    elif data_name.endswith('.xlx') or data_name.endswith('.xlsx'):
        DATASET = pd.read_excel(DATASET_LOC)
    elif data_name.endswith('.json'):
        DATASET = pd.DataFrame(json.load(open(DATASET_LOC, 'r')), index =[1])
    return DATASET

In [37]:
# This function should be triggered by a listener

data_name = "table.csv"
directory = '/home/aditya/Documents/citibot/newdata'
DATASET = new_data(data_name, directory)

# dict of columns in new data with corresponding dtypes

DATASET['Specimen No'] = DATASET['Specimen No'].astype('str')
DATASET.columns =["column " + str(i) for i, column in enumerate(DATASET.columns)] 
DATASET.columns =[column.replace(" ", "_") for column in DATASET.columns] 

FEATURES = {col: DATASET[col].dtype for col in DATASET}
DATASET.head()

Unnamed: 0,column_0,column_1,column_2,column_3,column_4,column_5,column_6
0,SPECIMEN1,1.0,300.0,36.0,1.25,10,0.0027
1,SPECIMEN2,2.0,450.0,48.0,1.25,10,0.0029
2,SPECIMEN3,5.0,600.0,64.0,1.25,10,0.0022
3,SPECIMEN4,1.0,300.0,48.0,1.66,20,0.0008
4,SPECIMEN5,2.0,450.0,64.0,1.66,20,0.0012


In [38]:
# List all the possible entities 
def get_entities(threshold_value):
    PRIMARY_KEY = []
    ENTITIES = {}

    for col in FEATURES.keys():
        if (DATASET[col].unique().shape[0] == DATASET.shape[0]) and (FEATURES[col] == 'O' or FEATURES[col] == 'int64'):           
            PRIMARY_KEY.append(col)                                             # Make this the PRIMARY KEY
            ENTITIES[col] = DATASET[col].unique()[:threshold_value].tolist()

        elif DATASET[col].unique().shape[0] > threshold_value and (FEATURES[col] == 'O' or FEATURES[col] == 'int64'):
            ENTITIES[col] = DATASET[col].unique()[:threshold_value].tolist()

        elif FEATURES[col] == 'O' or FEATURES[col] == 'int64':
            ENTITIES[col] = DATASET[col].unique().tolist()
            
    return ENTITIES, PRIMARY_KEY

threshold_value = 20
ENTITIES, PRIMARY_KEY = get_entities(threshold_value)

In [39]:
# For query generation, any number of previously determined entities (values) can be used.
# For each intent, what should differ between the different sets of queries is the meaning the latter conveys.
# For eg if we have an intent "Cost", it should have questions like :
# what is the cost {intent} for entity_1{value} ?
# Since we have to use the intent / Feature name as a part of the query itself, an important thing to conside is 
# that the feature names should be simple and directly convey their purpose
# For eg, feature name can be "cost" but if the feature name is "cost per head" then we the issue is that
# the intent name wont be the same as the feature name, once this happens we'll have to extract the real intent 
# from the feature names which is not possible to do right now.

def get_questions(intent):
    questions = []
    questions.append("what is {} for [{}]({})".format(intent, value, entity))
    questions.append("Tell me something about {} with ".format(intent))
    questions.append("Give me information about {} for the ".format(intent))
    
    return questions

# Dict of intents having a list of questions as their values.

INTENTS = {col:get_questions(col) for col in FEATURES.keys()}
INTENTS

{'column_0': ['what is column_0 for ',
  'Tell me something about column_0 with ',
  'Give me information about column_0 for the '],
 'column_1': ['what is column_1 for ',
  'Tell me something about column_1 with ',
  'Give me information about column_1 for the '],
 'column_2': ['what is column_2 for ',
  'Tell me something about column_2 with ',
  'Give me information about column_2 for the '],
 'column_3': ['what is column_3 for ',
  'Tell me something about column_3 with ',
  'Give me information about column_3 for the '],
 'column_4': ['what is column_4 for ',
  'Tell me something about column_4 with ',
  'Give me information about column_4 for the '],
 'column_5': ['what is column_5 for ',
  'Tell me something about column_5 with ',
  'Give me information about column_5 for the '],
 'column_6': ['what is column_6 for ',
  'Tell me something about column_6 with ',
  'Give me information about column_6 for the ']}

In [40]:
# making a dictionary of tables with corresponding intents for query based retrieval

table = os.path.splitext(data_name)[0]

if 'dict.pkl' not in os.listdir('/home/aditya/Documents/citibot/data'):
    information_table = {}
else:
    information_table = pickle.load(open('/home/aditya/Documents/citibot/data/dict.pkl', 'rb'))

information_table[table] = list(INTENTS.keys())
pickle.dump(information_table, open('/home/aditya/Documents/citibot/data/dict.pkl', 'wb'))

information_table

{'table': ['column_0',
  'column_1',
  'column_2',
  'column_3',
  'column_4',
  'column_5',
  'column_6']}

In [41]:
# the current nlu.md file has the following information : (1)intents, (2)lookup, (3)synonyms
# created three functions to update each of the these sections respectively
# optimisation for future instead of writing, just append new data


def synonyms_to_md(data_md, entity_dict):
    data = []
    for entity in entity_dict.keys():
        if FEATURES[entity] == 'int64' or entity in PRIMARY_KEY:
            continue
            
        # Now the only entities left are character dtype with limited uniques
        # For each value in entity add synonyms
        
        for val in entity_dict[entity]:
            string = " synonym:{}\n".format(val)
            synonyms = [val.lower(), val.upper(), val.title()]
            for sys in synonyms:
                string += "- {}\n".format(sys)
            string += "\n"
            data.append(string)
    
    data_md += data
    return data_md

# lookup should not include primary_key entity, integer entities

def lookups_to_md(data_md, entity_dict):
    data = []
    for entity in entity_dict.keys():
        string_ent = " lookup:{}\n".format(entity)
        
        if FEATURES[entity] == 'int64' or entity in PRIMARY_KEY:
            continue
            
        for val in entity_dict[entity]:
            string_ent += "- {}\n".format(val)
        string_ent += "\n"
        data.append(string_ent)
    
    data_md += data
    return data_md

def intents_to_md(data_md, intent_dict):
    data_md[-1] = data_md[-1] + '\n'
    data = []
    for intent in intent_dict.keys():
        string_intent = " intent:{}\n".format(intent)
        for ques in intent_dict[intent]:
            string_intent += "- {}\n".format(ques)
        string_intent += "\n"
        data.append(string_intent)
    
    data_md += data
#     print(data)
    return data_md

nlu = open('/home/aditya/Documents/citibot/newdata/nlu.md', 'r')
s=nlu.read().split('##')

nlu_intent = intents_to_md(s, INTENTS)
nlu_lookup = lookups_to_md(nlu_intent, ENTITIES)
# nlu_synonyms = synonyms_to_md(nlu_lookup, ENTITIES)

new_nlu = '##'.join(nlu_lookup)

f = open("demofile2.md", "w")
f.write(new_nlu)
f.close()

In [48]:
# the current stories.md file has storylines with respect to a classified intent
# For the dynamic addition of data, contextual storylines cannot be added due to their increased complexity
# name : ## {intent} path

def new_stories(story, intent_dict):
    data = []
    actions = []
    for intent in intent_dict:
        action = "action_{}".format(intent)
        string =" {} path 1\n* {}\n  - {}\n\n".format(intent, intent, action)
        data.append(string)
        actions.append(action)
        
    story += data
    return story, actions
    
story = open('/home/aditya/Documents/citibot/newdata/stories.md', 'r')
s=story.read().split('##')

story_text, Actions = new_stories(s, INTENTS)
new_story = '##'.join(story_text)

f = open("story.md", "w")
f.write(new_story)
f.close()    

In [58]:
# updating the actions file by addition of new actions based on an intent
# Each of the new intent is mapped to an action which will be triggered once the latter intent is predicted
# First open the given actions.py file in append mode, add the template text and voila new action is updated !
text = """

class {}(Action):

    def name(self) -> Text:
        return "{}"

    def run(self, dispatcher: CollectingDispatcher,
            tracker: Tracker,
            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:

        try:
            entities = tracker.latest_message['entities']
            table = get_table(tracker.latest_message['intent'])
            if(len(entities) == 0):
                dispatcher.utter_message(template = '{}')
                return []

            records = record_finder(entities)

            if(records.empty):
                raise ValueError("No record for this query !!!")
    
            print(records)
            dispatcher.utter_message(text="{}")
            return []

        except:
            dispatcher.utter_message(text = str(sys.exc_info()[1]))
            return []
"""


def add_action(Action, file):
    utter = []
    for action in Action[:2]:
        utterance = "utter" + action[6:]
        class_name = action.replace("_", "")
        utter.append(utterance)
        template = text.format(class_name, action, utterance, "its working for ")
        file.write(template)
#         print(template)
    
    return utter

current_actions = open('/home/aditya/Documents/ipynb/actions.py', 'a', encoding = 'utf-8')
utterances = add_action(Actions, current_actions)
print(utterances)
file.close()

['utter_column_0', 'utter_column_1']


In [75]:
# updating the domain.yml file
# it contains a list of intents, actions, responses, entities, slots

def update_domain(intents, actions, entities, responses, data):
    
    # update intent list
    pointer_intent = data.find("intents:")
    string = ""
    for intent in intents:
        string += "  - {}\n".format(intent)
    data = data[:pointer_intent+9] + string + data[pointer_intent+9:]
    
    # update actions list 
    pointer_actions = data.find("actions:")
    string = ""
    for action in actions[:2]:
        string += "  - {}\n".format(action)
    data = data[:pointer_actions+9] + string + data[pointer_actions+9:]
    
    # update entity list
    pointer_ent = data.find("entities:")
    string = ""
    for entity in entities:
        string += "  - {}\n".format(entity)
    data = data[:pointer_ent+10] + string + data[pointer_ent+10:]
    
    # update slot list
#     pointer_ent = data.find("slots:")
#     string = ""
#     for entity in entities:
#         string += "  - {}\n".format(entity)
#     data = data[:pointer_ent+10] + string + data[pointer_ent+10:]
    # update response list
#     pointer_ent = data.find("responses:")
#     string = ""
#     for entity in entities:
#         string += "  - {}\n".format(entity)
#     data = data[:pointer_ent+10] + string + data[pointer_ent+10:]
    
    return data

domain = open('/home/aditya/Documents/ipynb/domain.yml', 'r').read()
new_domain = update_domain(list(INTENTS.keys()), Actions, list(ENTITIES.keys()), utterances, domain)
# print(domain[domain.find("actions:"): domain.find("actions:")+9] + "jj\n" +domain[domain.find("actions:") + 9:])
# print(domain)
f = open("domain.yml", "w")
f.write(new_domain)
f.close() 

In [27]:
text = """

class ActionPayment(Action):

    def name(self) -> Text:
        return {}

    def run(self, dispatcher: CollectingDispatcher,
            tracker: Tracker,
            domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:

        try:
            entities = tracker.latest_message['entities']
            table = get_table(tracker.latest_message['intent'])
            if(len(entities) == 0):
                dispatcher.utter_message(template = {})
                return []

            records = record_finder(entities)

            if(records.empty):
                raise ValueError("No record for this query !!!")
    
            print(records)
            dispatcher.utter_message(text={} +records[{}].item())# {}  {}".format(PS, LE))
            return [{}]

        except:
            dispatcher.utter_message(text = str(sys.exc_info()[1]))
            return []
"""


file = open('/home/aditya/Documents/ipynb/actions.py', 'a', encoding = 'utf-8')
file.write(text)
file.close()

mdf = open('/home/aditya/Documents/citibot/newdata/nlu.md', 'r')
# markdown.markdown(mdf.read())
s=mdf.read().split('##')

h, p = "heyaaaa", "how"
string = ""

string1 = " intent:{}\n".format(h)
string1 += "- {}\n".format(p)
string1 += "\n"

string2 = " intent:{}\n".format(p)
string2 += "- {}\n".format(h)
string2 += "\n"
ss= []
ss.append(string1)
ss.append(string2)
print(ss)
v = '##'.join([string, string1, string2])

s[-1] = s[-1] + '\n'
s += ss
print(s)
v = '##'.join(s)

text = "adbc {}"
x=1
t = text.format("8")
print(t)
text = "action_intent"
"utter"+text[6:]