# KNOWLEDGE GRAPH INSIGHTS

### 1. SETUP
To prepare your environment, you need to install some packages and enter credentials for the Watson services.



### 1.1 Install the necessary packages
You need the latest versions of these packages:
Watson Developer Cloud: a client library for Watson services.
NLTK: leading platform for building Python programs to work with human language data.


In [None]:
!pip install mammoth

## Install the Watson Developer Cloud package:

In [None]:
!pip install --upgrade watson-developer-cloud

#### Install NLTK

In [None]:
!pip install --upgrade nltk

#### Install IBM Object Storage Client

In [None]:
!pip install ibm-cos-sdk

#### Install FuzzyWuzzy

In [None]:
!pip install fuzzywuzzy

#### Install Websocket client

In [None]:
!pip install websocket-client

### 1.2 Import packages and libraries
Import the packages and libraries that you'll use:

In [None]:
import json
import pandas as pd
import mammoth
import os, re
import networkx as nx
import io
from io import StringIO
import matplotlib.pyplot as plt
from collections import Iterable 
from io import BytesIO
import zipfile

import websocket

import ibm_boto3
from botocore.client import Config

import nltk
from nltk import word_tokenize,sent_tokenize,ne_chunk
from nltk.corpus import stopwords

from bs4 import BeautifulSoup
from bs4.element import Comment

from fuzzywuzzy import fuzz
from fuzzywuzzy import process


from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 \
  import Features, EntitiesOptions, SemanticRolesOptions, RelationsOptions, KeywordsOptions

In [None]:
#nltk.download()

## 2. Configuration
Add configurable items of the notebook below



### 2.1 Add your service credentials from Bluemix for the Watson services
You must create a Watson Natural Language Understanding service on Bluemix. Create a service for Natural Language Understanding (NLU). Insert the username and password values for your NLU in the following cell. Do not change the values of the version fields.

Run the cell.

In [None]:
natural_language_understanding = NaturalLanguageUnderstandingV1(
  username= '',
  password='',
  version='2017-02-27')

### 2.2 Add your service credentials for Object Storage


You must create Object Storage service on Bluemix. To access data in a file in Object Storage, you need the Object Storage authentication credentials. Insert the Object Storage authentication credentials as credentials_1 in the following cell after removing the current contents in the cell.

In [None]:
#Insert the Object Storage authentication credentials as credentials_1 here

In [None]:
#Add credentials of config_classification.txt here
#Insert the authentication credentials as credentials_2 

In [None]:
#Add credentials of config__relations.txt here
#Insert the authentication credentials as credentials_3 

In [None]:
#Insert the Archive.zip as StreamingBody object 

## 3. Persistence and Storage


#### 3.1 Configure Object Storage Client

In [None]:
cos = ibm_boto3.client('s3',
                    ibm_api_key_id=credentials_1['IBM_API_KEY_ID'],
                    ibm_service_instance_id=credentials_1['IAM_SERVICE_ID'],
                    ibm_auth_endpoint=credentials_1['IBM_AUTH_ENDPOINT'],
                    config=Config(signature_version='oauth'),
                    endpoint_url=credentials_1['ENDPOINT'])

def get_file(filename):
    '''Retrieve file from Cloud Object Storage'''
    fileobject = cos.get_object(Bucket=credentials_1['BUCKET'], Key=filename)['Body']
    return fileobject

    
def get_docx_file():
    '''Retrieve file '''
    docx_files=[]
    zip_ref = zipfile.ZipFile(BytesIO(streaming_body_1.read()),'r')
    paths = zip_ref.namelist()
    for path in paths:
        file=zip_ref.extract(path)
        docx_files.append(file)
    return docx_files



def load_string(fileobject):
    '''Load the file contents into a Python string'''

    text = fileobject.read()
    return text

def load_df(fileobject,sheetname):
    '''Load file contents into a Pandas dataframe'''
    excelFile = pd.ExcelFile(fileobject)
    df = excelFile.parse(sheetname)
    return df

def put_file(filename, filecontents):
    '''Write file to Cloud Object Storage'''
    resp = cos.put_object(Bucket=credentials_1['BUCKET'], Key=filename, Body=filecontents)
    return resp

### 4. Data Preparation 

#### 4.1 Global variables and functions.

In [None]:
# Maintain tagged text and plain text map
tagTextMap ={}


    
def HtmlToJson(html_file):
    '''
    function to convert information in Html tables to Json
    '''
    html_list = pd.read_html(html_file)
    # ''' converting the list of dataframe into one dataframe '''
    dataframe = pd.concat(html_list)

    dataframe, dataframe.columns, dataframe.columns.name = dataframe.iloc[1:], dataframe.loc[0].astype(str), None
    
    j = dataframe.to_json(orient='records')
    info_json = json.loads(j)
    return info_json

def getRawTextandHtmlTableAsJson(docx_file):
    '''
    function to extract text and Html tables from docx to Json
    '''
    result = mammoth.convert_to_html(docx_file)
    html_table = HtmlToJson(result.value)
    raw_text = mammoth.extract_raw_text(docx_file)
    raw_text = raw_text.value
    messages = result.messages 
    return html_table,raw_text
    
def generate_NLUJson_HtmlTablesJson():
    '''
    function that returns augmented results from nlu, html tables, raw texts
    '''
    html_tables=[]
    raw_texts=[]
    docx_files=[]
    augmented_results_from_nlu=[]
    docx_files= get_docx_file()
    for docx_file in docx_files:
        with open(docx_file,'rb') as doc_file:
            html_table,raw_text = getRawTextandHtmlTableAsJson(doc_file)
            html_tables.append(html_table)
            raw_texts.append(raw_text)
    for raw_text in raw_texts:
        response_nlu = classify_text(str(raw_text), config_classification_json)
        augmented_results_from_nlu.append(response_nlu)

    return augmented_results_from_nlu, html_tables, raw_texts
    

### 5. Watson Text Classification
Write the classification related utility functions in a modularalized form.



#### 5.1 Watson NLU Classification 

In [None]:
def analyze_using_NLU(text_content):
    '''
    Call Watson Natural Language Understanding service to obtain analysis results.
    '''
    response = natural_language_understanding.analyze(
        text= text_content,
        features=Features(
        entities=EntitiesOptions(),
        relations=RelationsOptions(),
        keywords= KeywordsOptions())
    )
    return response

#### 5.2 Augumented Classification
Custom classification utlity functions for augumenting the results of Watson NLU API call


In [None]:
def split_sentences(text):
    """ Split text into sentences.
    """
    sentence_delimiters = re.compile(u'[\\[\\]\n.!?]')
    sentences = sentence_delimiters.split(text)
    return sentences

def split_into_tokens(text):
    """ Split text into tokens.
    """
    tokens = nltk.word_tokenize(text)
    return tokens
    
def POS_tagging(text):
    """ Generate Part of speech tagging of the text.
    """
    POSofText = nltk.tag.pos_tag(text)
    return POSofText

def keyword_tagging(tag,tagtext,text):
    """ Tag the text matching keywords.
    """
    if (text.lower().find(tagtext.lower()) != -1):
        return text[text.lower().find(tagtext.lower()):text.lower().find(tagtext.lower())+len(tagtext)]
    else:
        return 'UNKNOWN'
    
def regex_tagging(tag,regex,text):
    """ Tag the text matching REGEX.
    """    
    p = re.compile(regex, re.IGNORECASE)
    matchtext = p.findall(text)
    regex_list=[]    
    if (len(matchtext)>0):
        for regword in matchtext:
            regex_list.append(regword)
    return regex_list

def chunk_tagging(tag,chunk,text):
    """ Tag the text using chunking.
    """
    parsed_cp = nltk.RegexpParser(chunk)
    pos_cp = parsed_cp.parse(text)
    chunk_list=[]
    for root in pos_cp:
        if isinstance(root, nltk.tree.Tree):               
            if root.label() == tag:
                chunk_word = ''
                for child_root in root:
                    chunk_word = chunk_word +' '+ child_root[0]
                chunk_list.append(chunk_word)
    return chunk_list
    
def augument_NLUResponse(responsejson,updateType,text,tag):
    """ Update the NLU response JSON with augumented classifications.
    """
    if(updateType == 'keyword'):
        if not any(d.get('text', None) == text for d in responsejson['keywords']):
            responsejson['keywords'].append({"text":text,"relevance":0.5})
    else:
        if not any(d.get('text', None) == text for d in responsejson['entities']):
            responsejson['entities'].append({"type":tag,"text":text,"relevance":0.5,"count":1})        
    

def classify_text(text, config):
    """ Perform augumented classification of the text.
    """
    
    response = analyze_using_NLU(text)
    responsejson = response
    
    sentenceList = split_sentences(text)
    
    tokens = split_into_tokens(text)
    
    postags = POS_tagging(tokens)
    
    configjson = json.loads(config)
    
    for stages in configjson['configuration']['classification']['stages']:
        for steps in stages['steps']:
            if (steps['type'] == 'keywords'):
                for keyword in steps['keywords']:
                    for word in sentenceList:
                        wordtag = keyword_tagging(keyword['tag'],keyword['text'],word)
                        if(wordtag != 'UNKNOWN'):
                            augument_NLUResponse(responsejson,'entities',wordtag,keyword['tag'])
            elif(steps['type'] == 'd_regex'):
                for regex in steps['d_regex']:
                    for word in sentenceList:
                        regextags = regex_tagging(regex['tag'],regex['pattern'],word)
                        if (len(regextags)>0):
                            for words in regextags:
                                augument_NLUResponse(responsejson,'entities',words,regex['tag'])
            elif(steps['type'] == 'chunking'):
                for chunk in steps['chunk']:
                    chunktags = chunk_tagging(chunk['tag'],chunk['pattern'],postags)
                    if (len(chunktags)>0):
                        for words in chunktags:
                            augument_NLUResponse(responsejson,'entities',words,chunk['tag'])
            else:
                print('UNKNOWN STEP')
    
    return responsejson

def replace_unicode_strings(response):
    """ Convert dict with unicode strings to strings.
    """
    if isinstance(response, dict):
        return {replace_unicode_strings(key): replace_unicode_strings(value) for key, value in response.iteritems()}
    elif isinstance(response, list):
        return [replace_unicode_strings(element) for element in response]
    elif isinstance(response, unicode):
        return response.encode('utf-8')
    else:
        return response

#### 5.3 Correferencing and Augmented Relations.

In [None]:
def chunk_sentence(text):
    """ Tag the sentence using chunking.
    """
    grammar = """
      NP: {<DT|JJ|PRP|NN.*>+} # Chunk sequences of DT,JJ,NN
          #}<VB*|DT|JJ|RB|PRP><NN.*>+{  # Chink sequences of VB,DT,JJ,NN       
      PP: {<IN><NP>}               # Chunk prepositions followed by NP
      V: {<V.*>}                   # Verb      
      VP: {<VB*><NP|PP|CLAUSE>+}  # Chunk verbs and their arguments
      CLAUSE: {<NP><VP>}           # Chunk NP, VP
      """  
    parsed_cp = nltk.RegexpParser(grammar,loop=2)
    pos_cp = parsed_cp.parse(text)
    return pos_cp
    
def find_attrs(subtree,phrase):
    attrs = ''
    if phrase == 'NP':
        for nodes in subtree:
            if nodes[1] in ['DT','PRP$','POS','JJ','CD','ADJP','QP','NP','NNP']:
                attrs = attrs+' '+nodes[0]
    return attrs    
    
def find_subject(t):
    for s in t.subtrees(lambda t: t.label() == 'NP'):
        return find_attrs(s,'NP')
    
def resolve_coreference(text, config):
    """ Resolve coreferences in the text for Nouns that are Subjects in a sentence
    """
    sentenceList = split_sentences(text)
    referenceSubject = ''
    sentenceText = ''
    configjson = json.loads(config)
    
    for sentences in sentenceList:    
        tokens = split_into_tokens(sentences)   
        postags = POS_tagging(tokens)
        sentencetags = chunk_sentence(postags)
        subjects = find_subject(sentencetags)
        for rules in configjson['configuration']['coreference']['rules']:
            if (rules['type'] == 'chunking'):
                for tags in rules['chunk']:
                    chunktags = chunk_tagging(tags['tag'],tags['pattern'],postags)
                    if (len(chunktags)>0):
                        for words in chunktags:
                            if tags['tag'] == 'PRP':
                                if subjects == '':
                                    sentenceText = sentenceText+sentences.replace(words,referenceSubject)+'. '
                            elif tags['tag'] == 'NAME':
                                if words == subjects:
                                    referenceSubject = words
                                    sentenceText = sentenceText+sentences+'. '
                    
    return sentenceText

def disambiguate_entities(text):
    """ Resolve disambiguity in the text using entities and entity resolution performed using Watson NLU
    """    
    sentenceList = split_sentences(text)
    taggedtext = text
    response = analyze_using_NLU(text)
    responsejson = response
    for sentences in sentenceList:
        tokens = split_into_tokens(sentences)
        postags = POS_tagging(tokens)
        name_tagged_text = chunk_tagging('NAME','NAME:{<NNP>+}',postags)
    for entities in responsejson['entities']:
        regexstr = entities['text']+'(?!>)'
        regex = re.compile(regexstr, re.IGNORECASE)
        tagText = '<'+entities['type']+':'+entities['text']+'>'
        taggedtext = re.sub(regexstr,tagText,taggedtext)
        tagTextMap[tagText] = entities['text']
    
    for roles in responsejson['semantic_roles']:
        if 'entities' not in roles['subject']:
            print('NO ENTITY')
        else:
            for entity in roles['subject']['entities']:
                if 'disambiguation' not in entity:
                    print('NO DISAMBIGUATION')
                else:
                    regexstr = roles['subject']['text']+'(?!>)'
                    regex = re.compile(regexstr, re.IGNORECASE)
                    tagText = '<'+entity['type']+':'+entity['text']+'>'
                    taggedtext = re.sub(regexstr,tagText,taggedtext)
                    tagTextMap[tagText] = entity['text']
    
    return taggedtext

def extract_relations(text, config,relations):
    """ Extract entity relationships in a sentence
    """    
    sentenceList = split_sentences(text)
    configjson = json.loads(config)
     
    for sentences in sentenceList:
        for rules in configjson['configuration']['relations']['rules']:
            if (rules['type'] == 'd_regex'):
                for regex in rules['d_regex']:
                    regextags = regex_tagging(regex['tag'],regex['pattern'],sentences)
                    if (len(regextags)>0):
                        for words in regextags:
                            relations.append((tagTextMap[words[0]],regex['tag'],tagTextMap[words[2]]))
         
    return relations

#### 5.4 Knowledge graph utility functions.

In [None]:
def create_nodes_dataframe(G):
    '''
    function to create nodes dataframe
    '''
    nodes_df = pd.DataFrame(list(G.nodes(data=True)))
    nodes_df.columns = ['entity_names','entitity_attributes']
    return nodes_df

def create_entityNodes(G,results_from_nlu):
    '''
    function to create entity nodes
    '''
    for j in range(len(results_from_nlu)):
        for i in range(len(results_from_nlu[j]['entities'])):
            new_node_name = results_from_nlu[j]['entities'][i]['text']
            G.add_node(new_node_name)
            for k,v in results_from_nlu[j]['entities'][i].items():
                if( k != 'text'):
                    G.node[new_node_name][k]=v

def filter_and_format_relations(relationships):
    '''
    function to filter and format relations
    '''
    req_relations=[]
    filter_relations=[]
    relations= relationships
    for rel in relations:
        r= rel['type']
        score = rel['score']
        entity_name= list()
        entity_type= list()
        for arg in rel['arguments']:
            entity_name.append(arg['entities'][0]['text'])
            entity_type.append(arg['entities'][0]['type'])
        if((entity_type[0] == 'GeopoliticalEntity' or entity_type[1] == 'GeopoliticalEntity')):
            if(any(nodes_df['entity_names']== entity_name[0] ) and any(nodes_df['entity_names']== entity_name[1])):
                filter_relations.append(rel)
    
    
    for filter_rel in filter_relations:
        r= filter_rel['type']
        score = filter_rel['score']
        text= list()
        for arg in filter_rel['arguments']:
            text.append(arg['entities'][0]['text'])

        rel_tuple= list()
        rel_tuple.append(text[0])
        rel_tuple.append(r)
        rel_tuple.append(text[1])

        rel_tuple= tuple(rel_tuple)

        req_relations.append(rel_tuple)

        
    return req_relations
            
    
def draw_simple_graph(graph):
    '''
    funtion to draw graph
    '''
    nodes = []
    labels = []
    edges = []
    # extract nodes from graph
    for tuples in graph:
        nodes.append(tuples[0])
        nodes.append(tuples[2])
        
    # extract edges from graph
    for edgepairs in graph:
        edges.append((edgepairs[0],edgepairs[2]))        
    # extract edge labels from graph
    for edgetuples in graph:
        labels.append(edgetuples[1])
    # create networkx graph
    G=nx.Graph()
    # add nodes
    for node in nodes:
        G.add_node(node)
    # add edges
    for edge in graph:
        G.add_edge(edge[0], edge[2], relation=edge[1])

    # draw graph
    pos = nx.shell_layout(G)
    #print(pos)
    nx.draw(G, pos,with_labels = True)
    edge_labels = dict(zip(edges, labels))
    nx.draw_networkx_edge_labels(G, pos, edge_labels = edge_labels)

    # show graph
    plt.show()
    
    return G, pos, edge_labels   


def knowledge_graph(results_from_nlu):
    '''
    funtion to draw knowledge graph
    '''
    relationships=[]
    for i in range(len(results_from_nlu)):
        for j in range(len(results_from_nlu[i]['relations'])):
            relationships.append(results_from_nlu[i]['relations'][j])
    rel=[]
    configjson= json.loads(config_relation_json)
    for i in range(len(raw_texts)):
        res = extract_relations(raw_texts[i],configjson)
        if res:
            rel= res
    response=filter_and_format_relations(relationships)
    response= response + rel
    G, pos, edge_labels = draw_simple_graph(response)
    return G, pos, edge_labels


def disambiguate_entities(text):
    '''
    funtion to disambiguate entities
    '''
    keyword=[]
    for j in range(len(results_from_nlu)):
        for i in range(len(results_from_nlu[j]['keywords'])):
            for k,v in results_from_nlu[j]['keywords'][i].items():
                if(k=='text'):
                    keyword.append(v)
                
    for word in keyword:
        tag= '<Keyword:'
        if word in text:
            text=re.sub(word,tag+word+'>' ,text)
    return text

def extract_relations(text,configjson):
    '''
    funtion to extract relations
    '''
    relationship=[]
    relations= configjson['configuration']['relations']['rules']
    text=disambiguate_entities(text)
    for rel in relations:
        match= re.findall(rel['pattern'],text)
        if match:
            temp1= re.split('<Keyword:',match[0][0])
            temp2= re.split('<Keyword:',match[0][2])
            match1= re.split('>',temp1[1])
            match2= re.split('>',temp2[1])
            tuplerel=[]
            tuplerel.append(match1[0])
            tuplerel.append(rel['tag'])
            tuplerel.append(match2[0])
            relationship.append(tuple(tuplerel))
    return relationship

### 6. Process

In [None]:
config_classification_json=load_string(get_file(credentials_2['FILE'])).decode("utf-8")

config_relation_json=load_string(get_file(credentials_3['FILE'])).decode("utf-8")

In [None]:
results_from_nlu, results_from_htmlTable, raw_texts = generate_NLUJson_HtmlTablesJson()

In [None]:
G = nx.MultiDiGraph()

In [None]:
create_entityNodes(G,results_from_nlu)

In [None]:

nodes_df = create_nodes_dataframe(G)

In [None]:
G, pos, edge_labels = knowledge_graph(results_from_nlu)
edge_labels

### 7. Querying the Knowledge Graph


In [None]:
edgeDict = G.edges(data=True)
edgeDict

In [None]:
def creategraphDataframe():
    """ Create a graph dataframe
    """
    f = lambda x,index:tuple( i[index] for i in x)

    tup1 = f(edgeDict,0)
    tup2 = f(edgeDict,1)
    tup3 = f(edgeDict,2)
    tup4 =[]

    for i in range(len(tup3)):
        tup4.append(tup3[i]['relation'])

    graph_dataframe = pd.DataFrame(list(zip(tup1, tup2, tup4)), columns=['node_1','node_2', 'relation'])
    return graph_dataframe

In [None]:
graph_dataframe = creategraphDataframe()

In [None]:
def get_mapping_d3_network():
    """ Create a mapping json for display by d3js network widget
    """
    nodes =[]
    links =[] 
    nodes_array = []
    result = {}
    for index, row in graph_dataframe.iterrows():
        node1 = row['node_1']
        node2 = row['node_2']
        
        relation = row['relation']
        node = {}
        node['id'] = node1
        if node not in nodes:
            nodes.append(node)
        
        node = {}
        node['id'] = node2
        if node not in nodes:
            nodes.append(node)
            
        link = {}
        link['source'] = node1
        link['target'] = node2
        link['value'] = relation
        links.append(link)
    i = 1
    for node in nodes:
        node['group'] = i
        i = i+1
        
    result["nodes"] = nodes
    result["links"] = links
    return result

In [None]:
def get_mapping_filter_d3_network(src,tgt):
    """ Create an mapping json for display by d3js network widget
    """
    nodes =[]
    links =[] 
    nodes_array = []
    result = {}
    for index, row in graph_dataframe.iterrows():
        node1 = row['node_1']
        node2 = row['node_2']
        
        relation = row['relation']
        node = {}
        node['id'] = node1
        if node1 == tgt:
            node['filter'] = 1
        elif node1 == src:
            node['filter'] = 1
        else:  
            node['filter'] = 0
        if node not in nodes:
            nodes.append(node)
        
        node = {}
        node['id'] = node2
        if node2 == tgt:
            node['filter'] = 1
        elif node2 == src:
            node['filter'] = 1
        else:  
            node['filter'] = 0
        if node not in nodes:
            nodes.append(node)
            
        link = {}
        
       
        link['source'] = node1
        link['target'] = node2
        link['value'] = relation
        link['filter'] = 0
        if ((node1==src) and (node2==tgt) or (node2==src) and (node1==tgt)):
            link['filter'] = 1
           
        links.append(link)
    i = 1
    for node in nodes:
        node['group'] = i
        i = i+1
        
    result["nodes"] = nodes
    result["links"] = links
    return result

In [None]:
get_mapping_d3_network()

In [None]:
''' algortihm pick nouns and verbs in the question look for the combination in the graph and get the answer'''
def getAnswerNodeFromGraph(graph_dataframe, nnList, nnpList):
    max_score_nn=0
    relation=''
    max_score_nnp=0
    matched_node =''
    answer_node =''
    total_score_node1=0 
    total_score_node2=0
    n = 0
    score=0
    for index, row in graph_dataframe.iterrows():
        total_score_node1 = 0
        total_score_node2 = 0

        for i in nnList:
            score = fuzz.token_sort_ratio(row['relation'], i)
            if(score > max_score_nn):
                max_score_nn = score
                relation = row['relation']

        for i in nnpList:
            total_score_node1 += fuzz.partial_ratio(row['node_1'], i)
            average_score_node1 = total_score_node1/len(nnpList)
            total_score_node2 += fuzz.partial_ratio(row['node_2'], i)
            average_score_node2 = total_score_node2/len(nnpList)

        if((average_score_node1 > average_score_node2) & (score > 70)):
            if((average_score_node1 > max_score_nnp) ):
                max_score_nnp = average_score_node1
                matched_node = row['node_1']
                n = 1
        elif((average_score_node1 < average_score_node2) & (score > 70)):
            if(average_score_node2 > max_score_nnp ):
                max_score_nnp = average_score_node2
                matched_node = row['node_2']
                n =2 

    # Create variable with TRUE if relation found
    relation_match = graph_dataframe['relation'] == relation

    # Create variable with TRUE if matched_node found in node_1 column
    node1_match = graph_dataframe['node_1'] == matched_node 

    # Create variable with TRUE if matched_node found in node_1 column
    node2_match = graph_dataframe['node_2'] == matched_node

    ans = ""
    if(n==1):
        ans = graph_dataframe[relation_match & (node1_match | node2_match)]['node_2'].values
    else:
        ans = graph_dataframe[relation_match & (node1_match | node2_match)]['node_1'].values  
    
    response = {}
    response['src'] = matched_node
    print('source node is ' + response['src'])
    response['tgt'] = ans[0]    
    print('answer is '+ response['tgt'])
    return response   

In [None]:
def getAnswerForQuery(question):
    '''This method takes in a question returns an answer'''
    tags_list =[]
    text=nltk.word_tokenize(question)
    tags = nltk.pos_tag(text)
    tags_list.append(tags)
    nnList=[]
    
    nnpList=[]
    
    
    revDct = dict((key, val ) for (key, val) in tags_list[0])
    print('rev dictionary...')
    print(revDct)
    for key,value in revDct.items():
        if(value == 'NN' or value=='VBN' or value=='NNS'):
            nnList.append(key)
        if(value == 'NNP'):
            nnpList.append(key)
    print('the nn list')
    print(nnList)
    print('the nnp list')
    print(nnpList)
    
    answer = getAnswerNodeFromGraph(graph_dataframe, nnList, nnpList)['tgt']
    
    return answer   

In [None]:
def getAnswerForUIQuery(question):
    '''This method takes in a question from user interface returns an answer'''
    tags_list =[]
    text=nltk.word_tokenize(question)
    tags = nltk.pos_tag(text)
    tags_list.append(tags)
    nnList=[]
    nnpList=[]
    revDct = dict((key, val ) for (key, val) in tags_list[0])
    for key,value in revDct.items():
        if(value == 'NN'or value=='VBN' or value=='NNS'):
            nnList.append(key)
        if(value == 'NNP'):
            nnpList.append(key)
    answer = getAnswerNodeFromGraph(graph_dataframe, nnList, nnpList)
    print (answer)
    response = get_mapping_filter_d3_network(answer['src'],answer['tgt'])
    return response 

### 8.Expose integration point for Node-RED

In [None]:
def on_message(ws, message):
    print(message)
    msg = json.loads(message)
    cmd = msg['cmd']
    
    if cmd == 'query':
        print("query hi")
        question = msg['question']
        answer = getAnswerForQuery(question)
        wsresponse = {}
        wsresponse["forcmd"] = "query" 
        wsresponse["response"] = answer
        ws.send(json.dumps(wsresponse))
    if cmd == 'uiquery':
        print("uiquery hi")
        question = msg['question']
        answer = getAnswerForUIQuery(question)
        print ('ans',answer)
        wsresponse = {}
        wsresponse["forcmd"] = "uiquery" 
        wsresponse["response"] = answer
        ws.send(json.dumps(wsresponse))    
    elif cmd == "graph":
        wsresponse = {}
        wsresponse["forcmd"] = "graph"
        wsresponse["response"]= get_mapping_d3_network()
        ws.send(json.dumps(wsresponse))
    
def on_error(ws, error):
    print(error)

def on_close(ws):
    ws.send("DSX Listen End")

def on_open(ws):
    def run(*args):
        for i in range(10000):
            hbeat = '{"cmd":"Olympics DSX HeartBeat"}'
            ws.send(hbeat)
            time.sleep(100)
            
    _thread.start_new_thread(run, ())


def start_websocket_listener():
    websocket.enableTrace(True)
    ws = websocket.WebSocketApp("ws://reddytrackiot.eu-gb.mybluemix.net/ws/abc",
                              on_message = on_message,
                              on_error = on_error,
                              on_close = on_close)
    ws.on_open = on_open
    ws.run_forever()

In [None]:
 start_websocket_listener()