In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
import time
import threading
import networkx as nx
import nltk
from nltk import word_tokenize,sent_tokenize,ne_chunk
import json
from textblob import TextBlob


In [86]:
class KnowledgeGraph:
    """ Knowledge Graph Object, stores variables and information about the knowledge graph. """
    
    def __init__(self, triplets_df):
        """ 
        Takes in a pandas dataframe with columns confidence, sentence, subject, relation, object. 
        ---------------------------------------------------
        
        Stores the following information:
        > ALL = "ALL"
        > triplets_map
        > numbers_map
        > map_triplets_df
        > topic_triplets_df
        > G
        > pos
        > edges
        > topics_list
        > relation_list
        > subject_list
        > object_list
        """
        self.ALL = "ALL"
        self.triplets_df = triplets_df
        self.triplets_map, self.numbers_map = self.get_triplets_map()
        self.map_triplets_df = self.map_triplets()
        self.topic_triplets_df = self.find_initial_nouns()
        
        # generate main graph
        self.G, self.pos, self.edges, topics = self.create_kg()
        self.topics_list = self.unique_sorted_values_plus_ALL(pd.Series(topics))
        self.relation_list = self.unique_sorted_values_plus_ALL(pd.Series(list(self.edges.values())))
        self.subject_list = self.unique_sorted_values_plus_ALL(self.triplets_df.subject, inc_all=False)
        self.object_list = self.unique_sorted_values_plus_ALL(self.triplets_df.object, inc_all=False)
        
    def create_kg(self):  
        """ 
        Create knowledge graph G with the triplets' mapped numerical value. 
        --------------------------
        
        KIV: 
        -- Store previous plot as a variable. Compare current graph with previous plot when replotting. If same, reuse the previous plot. 
            Most useful when plotting entire graph. Plotting subsets are fast and do not require this.
        """    
        subject_nodes = []
        object_nodes = []
        topic_nodes = []    
        relationship_edges = []
        labels = []    
        topic_edges = []    
        subject_nouns = []
        object_nouns = []
        for row in range(len(self.topic_triplets_df)):
            triplet = self.topic_triplets_df.loc[row]
            subject_nodes.append(triplet['subject_map'])
            object_nodes.append((triplet['object_map'], triplet['sentence']))        
            relationship_edges.append((triplet['subject_map'],triplet['object_map']))
            labels.append((triplet['relation']))        
            for topic in triplet['subject_tags']:
                topic_nodes.append(topic)
                topic_edges.append((topic, triplet['subject_map']))            
            for topic in triplet['object_tags']:
                topic_nodes.append(topic)
                topic_edges.append((topic, triplet['object_map']))        
        G = nx.MultiDiGraph()    
        # add nodes
        for node in subject_nodes:
            G.add_node(node)        
        for node in object_nodes:
            G.add_node(node[0], sentence = node[1])        
        for node in topic_nodes:
            G.add_node(node)    
        # add edges
        edgeCount = 0
        for edge in topic_edges:
            G.add_edge(edge[0], edge[1])        
        for edge in relationship_edges:
            G.add_edge(edge[0], edge[1], relation = labels[edgeCount])
            edgeCount += 1        
        edges = dict(zip(relationship_edges, labels))
        # choose your layout 
        pos = nx.spring_layout(G)
        #pos = nx.drawing.layout.multipartite_layout(G)
        #pos = nx.nx_agraph.graphviz_layout(G, prog='neato')
        #pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
        #pos = nx.nx_agraph.graphviz_layout(G, prog='twopi')
        #pos = nx.nx_agraph.graphviz_layout(G, prog='fdp')
        return G, pos, edges, topic_nodes
    
    def display_full_graph(self):
        return self.draw_graph_triplets(self.get_triplet_position(), self.get_triplet_edges())
    
    def display_filtered_graph(self, rel=None, top=None):
        if (rel is None) and (top is None):
            self.display_full_graph()
        elif (top is None):
            self.draw_rel_filter(rel)
        elif (rel is None):
            self.draw_topic_filter(top)
        else:
            self.draw_double_filter(rel, top)
            
    def draw_rel_filter(self, rel):
        filtered_graph = self.G.subgraph(self.get_nodes(rel))
        plt.figure(figsize=(15,5))
        filtered_pos = {k:v for k,v in self.pos.items() if k in filtered_graph.nodes}
        filtered_graph = nx.relabel_nodes(filtered_graph, self.numbers_map, copy = True)
        filtered_pos = {self.numbers_map[k]:v for k,v in filtered_pos.items()}
        nx.draw(filtered_graph, filtered_pos, with_labels = True)
        plt.show()
        
    def draw_topic_filter(self, topic):#############################
        data_successors = nx.dfs_successors(self.G,topic)
        successor_list = data_successors.values()
        allsuccessors = [item for sublist in successor_list for item in sublist]
        allsuccessors.append(topic)
        filteredG = self.G.subgraph(allsuccessors)
        filteredG_edges = list(filteredG.edges)
        filtered_pos = {k:v for k,v in self.pos.items() if k in filteredG.nodes}
        filtered_edges = {}
        triplet_edges = self.get_triplet_edges()
        for u,v,e, in filteredG_edges:
            if (u,v) in triplet_edges:
                filtered_edges[(u,v)] = triplet_edges[(u,v)]
        plt.figure(figsize=(15,5))
        filteredG = nx.relabel_nodes(filteredG, self.numbers_map, copy = True)
        #print('filtered_post.items()', filtered_pos.items())
        relabel_pos = {}
        for k,v in filtered_pos.items():
            if type(k) == np.int32:
                #print(k, type(k))
                relabel_pos[self.numbers_map[k]] = v
            else:
                relabel_pos[k] = v  
        nx.draw_networkx_nodes(filteredG, relabel_pos)
        nx.draw_networkx_labels(filteredG, relabel_pos)
        nx.draw_networkx_edges(filteredG, relabel_pos , alpha=0.5)
        nx.draw_networkx_edge_labels(filteredG, relabel_pos , edge_labels = filtered_edges, font_color='red')
        plt.show()
    
    def draw_double_filter(self, rel, topic):
        data_successors = nx.dfs_successors(self.G,topic)
        successor_list = data_successors.values()
        allsuccessors = [item for sublist in successor_list for item in sublist]
        allsuccessors.append(topic)
        filteredG = self.G.subgraph(allsuccessors)
        filteredG = self.G.subgraph(self.get_nodes(rel, filteredG))
        filteredG_edges = list(filteredG.edges)
        filtered_pos = {k:v for k,v in self.pos.items() if k in filteredG.nodes}
        filtered_edges = {}
        triplet_edges = self.get_triplet_edges()
        for u,v,e, in filteredG_edges:
            if (u,v) in triplet_edges:
                filtered_edges[(u,v)] = triplet_edges[(u,v)]
        plt.figure(figsize=(15,5)) 
        filteredG = nx.relabel_nodes(filteredG, self.numbers_map, copy = True)
        relabel_pos = {}
        for k,v in filtered_pos.items():
            if type(k) == np.int32:
                relabel_pos[self.numbers_map[k]] = v
            else:
                relabel_pos[k] = v  
        nx.draw_networkx_nodes(filteredG, relabel_pos)
        nx.draw_networkx_labels(filteredG, relabel_pos)
        nx.draw_networkx_edges(filteredG, relabel_pos , alpha=0.5)
        nx.draw_networkx_edge_labels(filteredG, relabel_pos , edge_labels = filtered_edges, font_color='red')
        plt.show()

    def get_triplets_map(self):
        """ 
        Maps object and subject strings to a unique number. 
        """
        
        triplets_map = {}
        numbers_map = {}
        index = 0
        for row in range(len(self.triplets_df)):
            triplet = self.triplets_df.loc[row]
            sub = triplet['subject'] 
            obj = triplet['object']
            if sub not in triplets_map:
                triplets_map[sub] = index
                numbers_map[index] = sub
                index += 1
            if obj not in triplets_map:
                triplets_map[obj] = index
                numbers_map[index] = obj
                index += 1
                
        return triplets_map, numbers_map
    
    def map_triplets(self):
        """ 
        Returns triplets_df with its mapped values. 
        """
        
        df = self.triplets_df.copy(deep = True)
        for row in range(len(df)):
            triplet = df.loc[row]
            sub = triplet['subject'] 
            obj = triplet['object']
            df.loc[row, 'subject_map'] = self.triplets_map[sub]
            df.loc[row, 'object_map'] = self.triplets_map[obj]
        df.subject_map = df.subject_map.astype(int)
        df.object_map = df.object_map.astype(int)
        
        return df
    
    def find_initial_nouns(self):
        """
        Finds the nouns for each subject/object to generate topic nodes.
        """
        
        df = self.map_triplets_df.copy(deep = True)
        df['subject_tags'] = np.empty((len(df), 0)).tolist()
        df['object_tags'] = np.empty((len(df), 0)).tolist()
        proper_nouns = [] # proper nouns
        subject_nouns = []
        object_nouns = []
        
        for row in range(len(df)):
            triplet = df.loc[row]                
            sentence = triplet['sentence'] 
            tokens = self.split_tokens(sentence)
            postags = self.POS_tagging(tokens)
            nounphrases = self.phrase_extraction(sentence)
            #postag_dict[sentence] = postags        
            sub = triplet['subject']
            obj = triplet['object']        
            subject_tags = []
            object_tags = []
            for tag in postags:
                if tag[1] == 'NNPS' or tag[1] == 'NNP':                
                    noun = tag[0]                
                    if noun in sub and noun not in subject_tags:
                        subject_tags.append(noun)
                        proper_nouns.append(noun)
                    if noun in obj and noun not in object_tags:
                        object_tags.append(noun)
                        proper_nouns.append(noun)
            for noun in nounphrases:
                if noun in sub and noun not in subject_tags:
                    if noun.upper() not in proper_nouns:
                        subject_tags.append(noun)
                    else:
                        subject_tags.append(noun.upper())
                if noun in obj and noun not in object_tags:
                    if noun.upper() not in proper_nouns:
                        object_tags.append(noun)
                    else:
                        object_tags.append(noun.upper())
                
            subject_nouns.append(subject_tags)
            object_nouns.append(object_tags)        
        df['subject_tags'] = subject_nouns
        df['object_tags'] = object_nouns 
        return df
    
    def find_nouns(self, text):
        tokens = self.split_tokens(text)
        postags = self.POS_tagging(tokens) 
        nounphrases = self.phrase_extraction(text)
        new_topics = []
        for tag in postags:
            if tag[1] == 'NNPS' or tag[1] == 'NNP':               
                noun = tag[0]                
                if noun not in new_topics:
                    new_topics.append(noun)
        for noun in nounphrases:
            if noun not in new_topics:
                new_topics.append(noun)
        return new_topics
    
    def get_triplet_position(self):    
        ''' return a dictionary of the position of triplets in the knowledge graph '''
        triplet_position = {}    
        for position in self.G:
            if position in self.numbers_map.keys():
                triplet_position[self.numbers_map[position]] = self.pos[position]
            elif position in self.pos.keys():
                triplet_position[position] = self.pos[position]
            #else:
                #triplet_position[position] = pos[position]
        return triplet_position
    
    def get_triplet_edges(self):    
        ''' return a dictionary of the relationship of triplets in the knowledge graph '''
        triplet_edges = {}    
        for k in self.edges:
            #print(k)
            sub = self.numbers_map[k[0]]
            obj = self.numbers_map[k[1]]
            rel = self.edges[k]    
            triplet_edges[(sub,obj)] = rel    
        return triplet_edges
    
    def draw_graph_triplets(self, triplet_pos, triplet_edges):    
        ''' draw knowledge graph G with the triplets' mapped numerical value '''
        relabel_G = nx.relabel_nodes(self.G, self.numbers_map, copy = True)
        plt.figure(figsize=(80,40))    
        nx.draw_networkx_nodes(relabel_G, triplet_pos, node_size=40)
        nx.draw_networkx_labels(relabel_G, triplet_pos)
        nx.draw_networkx_edges(relabel_G, triplet_pos, alpha=0.5) 
        #nx.draw_networkx_edges(G, triplet_pos, alpha=0.5, with_labels = True)    
        nx.draw_networkx_edge_labels(relabel_G, triplet_pos, edge_labels = triplet_edges, font_color='red')
        plt.show()
    
    def get_nodes(self, edge, filteredG=None):
        ''' get nodes connected by specified edge'''
        node = []
        if (filteredG is None):
            filteredG = self.G
        for u,v,e in filteredG.edges(data=True):
            if e == {}:
                continue
            if e['relation'] == edge:
                if u not in node:
                    node.append(u)
                if v not in node:
                    node.append(v)
        return node
    
    def unique_sorted_values_plus_ALL(self, array, inc_all = True):
        '''generates a unique list with ALL appended to the top'''
        unique = array.unique().tolist()
        unique.sort()
        if inc_all:
            unique.insert(0, self.ALL)
        return unique
    
    def add_triplet_to_df(self,sentence, sub, rel, obj, sub_map, obj_map, sub_top, obj_top):
        new_entry = {}
        new_entry['confidence'] = 1
        new_entry['sentence'] = sentence
        new_entry['subject'] = sub
        new_entry['relation'] = rel
        new_entry['object'] = obj
        new_entry['subject_map'] = sub_map
        new_entry['object_map'] = obj_map
        new_entry['subject_tags'] = sub_top
        new_entry['object_tags'] = obj_top
        self.topic_triplets_df = self.topic_triplets_df.append(new_entry, ignore_index=True) 
    
    def add_as_subject(self, text):
        if text not in self.subject_list:
            self.subject_list.append(text)
        return self.add_as_node(text)
    
    def add_as_object(self, text):
        if text not in self.object_list:
            self.object_list.append(text)
        return self.add_as_node(text)
    
    def add_as_node(self, text):
        # extract topic from text
        new_topics = self.find_nouns(text)
        for noun in new_topics:
            if noun not in self.topics_list and noun.upper() not in self.topics_list:
                # if topic doesn't exist, add topic as node
                self.topics_list.append(noun)
                self.G.add_node(noun)
        # map text to an index (not added to self.map_triplets_df, self.triplets_map)
        if text not in self.numbers_map.values():
            index = max(self.numbers_map.keys())+1
            self.numbers_map[index] = text
            self.triplets_map[text] = index
        else:
            index = self.triplets_map[text]
        # add index to graph
        self.G.add_node(index)
        # create edge between text and topic
        for topic in new_topics:
            self.G.add_edge(topic, index)
        # update pos
        self.pos = nx.spring_layout(self.G, pos=self.pos)
        # figure out how to dynamically change the dropdown menus
    
    def add_as_edge(self, sub, obj, rel):
        """ 
        Adds edge to G, edges, and also to relation_list if not yet in relation_list.
        """
        if sub not in self.triplets_map:
            self.add_as_subject(sub)
        if obj not in self.triplets_map:
            self.add_as_object(obj)
        sub_id = self.triplets_map[sub]
        obj_id = self.triplets_map[obj]
        if self.G.has_edge(sub_id, obj_id):
            return False
        self.G.add_edge(sub_id, obj_id, relation = rel)
        if rel not in self.relation_list:
            self.relation_list.append(rel)
        self.edges[(self.triplets_map[sub], self.triplets_map[obj])] = rel
        return True
    
    def edit_node(self, old_node, new_node):
        value = self.triplets_map.pop(old_node)
        # change name in triplets_map
        self.triplets_map[new_node] = value
        # change name in numbers_map
        self.numbers_map[value] = new_node
        # change name in subject_list
        try:
            idx = self.subject_list.index(old_node)
            if new_node not in self.subject_list:
                self.subject_list[idx] = new_node
            else:
                self.subject_list.pop(idx)
        except ValueError:
            pass
        # change name in object_list
        try:
            idx = self.object_list.index(old_node)
            if new_node not in  self.object_list:
                self.object_list[idx] = new_node
            else:
                self.object_list.pop(idx)
        except ValueError:
            pass
        # change name in dataframe
        self.topic_triplets_df.loc[self.topic_triplets_df.subject == old_node, "subject"] = new_node
        self.topic_triplets_df.loc[self.topic_triplets_df.subject == old_node, "object"] = new_node
        
    def edit_edge(self, old_rel, new_rel):
        # change name in relation_list
        idx = self.relation_list.index(old_rel)
        if new_rel not in self.relation_list:
            self.relation_list[idx] = new_rel
        else:
            self.relation_list.pop(idx)
        # change name in edges
        self.edges = {k:(new_rel if old_rel == v else v) for k,v in self.edges.items()}
        # change name in graph
        for n, nbrsdict in self.G.adjacency():
            for nbr, keydict in nbrsdict.items():
                for key, eattr in keydict.items():
                    keydict[key] = {k:(new_rel if v==old_rel else v) for k,v in eattr.items()}
        # change name in dataframe
        self.topic_triplets_df.loc[self.topic_triplets_df.subject == old_rel, "relation"] = new_rel
        
    def edit_topic(self, old_topic, new_topic):
        # change name in topics_list
        idx = self.topics_list.index(old_topic)
        if new_topic not in self.topics_list:
            self.topics_list[idx] = new_topic
        else:
            self.topics_list.pop(idx)
        # change name in graph
        nx.relabel_nodes(self.G, {old_topic:new_topic}, copy=False)
        # change name in dataframe
        for idx,row in self.topic_triplets_df.iterrows():
            if (old_topic in row.subject_tags):
                i = row.subject_tags.index(old_topic)
                row.subject_tags[i] = new_topic
            if (old_topic in row.object_tags):
                i = row.object_tags.index(old_topic)
                row.object_tags[i] = new_topic
                
    def edit_ans(self, sub, rel, obj, ans):
        # check if row exists
        cond = (self.topic_triplets_df.subject == sub) & (self.topic_triplets_df.relation == rel) & (self.topic_triplets_df.object == obj)
        if any(cond):
            self.topic_triplets_df.loc[cond, "sentence"] = ans
            return True
        else:
            return False
    
    def remove_node(self, text):
        # remove from triplets map, from numbers_map
        index = self.triplets_map[text]
        del self.triplets_map[text]
        del self.numbers_map[index]
        # find node's edges and remove them, remove edges from relation, refresh lists
        for k,v in list(self.edges.items()):
            _ = self.edges.pop(k) if (index in k) else None
        affected_rel = []
        for item in nx.dfs_predecessors(self.G, index).items():
            self.G.remove_edge(item[1],item[0])
            affected_rel.append(item[::-1])
        for item in nx.dfs_successors(self.G, index).items():
            self.G.remove_edge(*item)
            affected_rel.append(item)
        # drop from df    
        for item in affected_rel:
            index = self.topic_triplets_df[(kg.topic_triplets_df.subject == item[0])&(kg.topic_triplets_df.object == item[1])].index[0]
            self.topic_triplets_df.drop(index)
        self.relation_list = self.unique_sorted_values_plus_ALL(pd.Series(self.edges.values()))
        self.G.remove_node(index)
        # remove from subject and object list
        if text in self.subject_list:
            self.subject_list.remove(text)
        if text in self.object_list:
            self.object_list.remove(text)        
        # find node's topic, check if topic is empty, if yes, remove topic from graph and filter
        new_topics = self.find_nouns(text)
        for noun in new_topics:
            # QUICK HACK FIX: Sometimes, noun phrases found may not be in list.
            # Try to fix if time permits
            try:
                if len(nx.dfs_successors(self.G, noun)) == 0:
                    self.G.remove_node(noun)
                    self.topics_list.remove(noun)
            except KeyError:
                pass
    
    def remove_edge(self, sub, obj):
        """ 
        Attempts to remove edge from graph.
        """
        sub_id = self.triplets_map[sub]
        obj_id = self.triplets_map[obj]
        try:
            del self.edges[(sub_id, obj_id)]
            self.G.remove_edge(sub_id, obj_id)
            index = self.topic_triplets_df[(kg.topic_triplets_df.subject == sub)&(kg.topic_triplets_df.object == obj)].index[0]
            self.topic_triplets_df = self.topic_triplets_df.drop(index)
        except:
            pass
        self.relation_list = self.unique_sorted_values_plus_ALL(pd.Series(self.edges.values()))
        
    def get_sentence(self, sub, rel, obj):
        cond = (self.topic_triplets_df.subject == sub) & (self.topic_triplets_df.relation == rel) & (self.topic_triplets_df.object == obj)
        if any(cond):
            return self.topic_triplets_df.loc[cond, "sentence"]
        else: 
            return None
        
    def get_tags(self, item, loc):
        tags = []
        if loc == "subject":
            idx = (self.topic_triplets_df.subject == item).idxmax()
            tags = self.topic_triplets_df.loc[idx, "subject_tags"]
        if loc == "object":
            idx = (self.topic_triplets_df.object == item).idxmax()
            tags = self.topic_triplets_df.loc[idx, "object_tags"]
        return tags
    
    def split_tokens(self, text):
        """ 
        Split text into tokens.
        """
        
        text = text.replace('/',' ')
        tokens = nltk.word_tokenize(text)
        
        return tokens
    
    def POS_tagging(self, text):
        """ 
        Generate Part of speech tagging of the text.
        """
        
        POSofText = nltk.tag.pos_tag(text)
        
        return POSofText
    
    def phrase_extraction(self, text):
        """
        Noun phrase extraction.        
        """
        blob = TextBlob(text)
        return blob.noun_phrases  

In [87]:
class Dashboard:
    """ 
    Uses a knowledge graph object to create the dashboard using ipython widgets. 
    
    Contains the following widgets:
    -------------------------------
    
    Main Class:
        > Knowledge Graph as 'kg'
        > Main output as 'output'
    
    Filter Widget:
        > dropdown_relation
        > dropdown_entity
        > refresh_btn
        
    Adding Widget:
        > node_text
        > subject_text
        > object_text
        > relation_text
        > answer_text
        > triplet_button
        > triplet_output
        
    Editing Widget (Tab Interface):
        > edit_tab
        
    Editing Widget (Node):
        > edit_node_dropdown
        > edit_node_confirm_button
        > edit_node_textbox
        > edit_node
        
    Editing Widget (Relation):
        > edit_rel_dropdown
        > edit_rel_confirm_button
        > edit_rel_textbox
        > edit_rel
        
    Editing Widget (Topic):
        > edit_topic_dropdown
        > edit_topic_confirm_button
        > edit_topic_textbox
        > edit_topic 
        
    Editing Widget (Candidate Answer):
        > edit_ans_sub_filter 
        > edit_ans_rel_filter 
        > edit_ans_obj_filter
        > edit_ans_confirm_button 
        > edit_ans_field
        > edit_ans
        
    Removing Widget:
        > node_text2
        > edge_subject_text2
        > edge_object_text2 
        > edge_relation_text2
        > node_button2 
        > edge_button2 
        
    """
    
    def __init__(self, kg):    
        self.kg = kg
        self.ALL = "ALL"
        
        self.output = widgets.Output()
        with self.output:
            self.kg.display_full_graph()
            
        self.initialize_filters()
        self.initialize_adds() 
        self.initialize_edit()
        self.initialize_remv() # in-progress
      
        
        

    def initialize_filters(self):
        """ Initializes the filters for the graph"""
        
        self.dropdown_relation = widgets.Dropdown(description = "Filter by relation", options = self.kg.relation_list,
                                    style = {'description_width': 'initial'})
        self.dropdown_entity = widgets.Dropdown(description = "Filter by topic", options = self.kg.topics_list,
                                          style = {'description_width': 'initial'})
        self.refresh_btn = widgets.Button(description = "Refresh Graph",
                                          style = {'description_width': 'initial'})
        
        self.dropdown_relation.observe(self.dropdown_relation_eventhandler, names='value')
        self.dropdown_entity.observe(self.dropdown_entity_eventhandler, names='value')
        self.refresh_btn.on_click(self.refresh_btn_eventhandler)
        
        display(widgets.HBox((self.dropdown_relation, self.dropdown_entity, self.refresh_btn)))
        display(self.output)
        print("\n")
        
    def initialize_adds(self):
        """ Initializes the adding portion of the dashboard. """

        self.subject_text = widgets.Combobox(description = "Subject", placeholder = "Add relation to new or existing subject.", options = list(set(self.kg.subject_list).union(set(self.kg.object_list))))
        self.relation_text = widgets.Combobox(description = "Relation", placeholder = "Add add new or existing relation to subject/object.", options = self.kg.relation_list)
        self.object_text = widgets.Combobox(description = "Object", placeholder = "Add relation to new or existing object.", options = list(set(self.kg.subject_list).union(set(self.kg.object_list))))
        self.answer_text = widgets.Textarea(description = "Intended answer", placeholder='Write the intended answer for the query here.')

        self.triplet_button = widgets.Button(description="Add triplet to KG")

        # output widget to provide feedback
        self.triplet_output = widgets.Output()
        
        self.triplet_button.on_click(self.triplet_button_click_eventhandler)
        
        display(widgets.HBox((self.subject_text, self.object_text, self.relation_text, self.triplet_button, self.triplet_output)))
        display(self.answer_text)
        print("\n")
    
    def initialize_edit(self):
        """ Initializes the editing portion of the dashboard. """
        
        # node dropdown widget
        self.edit_node_dropdown = widgets.Dropdown(description = "Node to edit", options = list(set(self.kg.subject_list).union(set(self.kg.object_list))))
        # node confirm button
        self.edit_node_confirm_button = widgets.Button(description = "Edit Node")
        self.edit_node_confirm_button.on_click(self.edit_node_eventhandler)
        # node edit text box
        self.edit_node_textbox = widgets.Text(description = "Updated node.", placeholder = "Type the edited node here.")
        self.edit_node = widgets.VBox([self.edit_node_dropdown,  self.edit_node_textbox, self.edit_node_confirm_button])
        
        # rel dropdown widget
        self.edit_rel_dropdown = widgets.Dropdown(description = "Relation to edit", options = self.kg.relation_list[1:])
        # rel confirm button
        self.edit_rel_confirm_button = widgets.Button(description = "Edit Relation")
        self.edit_rel_confirm_button.on_click(self.edit_rel_eventhandler)
        # rel edit text box
        self.edit_rel_textbox = widgets.Text(description = "Updated relation.", placeholder = "Type the edited relation here.")
        self.edit_rel = widgets.VBox([self.edit_rel_dropdown, self.edit_rel_textbox, self.edit_rel_confirm_button])
        
        # topic dropdown widget
        self.edit_topic_dropdown = widgets.Dropdown(description = "Topic to edit", options = self.kg.topics_list[1:])
        # topic confirm button
        self.edit_topic_confirm_button = widgets.Button(description = "Edit Topic")
        self.edit_topic_confirm_button.on_click(self.edit_topic_eventhandler)
        # topic edit text box
        self.edit_topic_textbox = widgets.Text(description = "Updated topic.", placeholder = "Type the edited topic here.")
        self.edit_topic = widgets.VBox([self.edit_topic_dropdown, self.edit_topic_textbox, self.edit_topic_confirm_button])
        
        # sub filter
        self.edit_ans_sub_filter = widgets.Dropdown(description = "Subject", options = self.kg.subject_list)
        # rel filter
        self.edit_ans_rel_filter = widgets.Dropdown(description = "Relation", options = self.kg.relation_list[1:])
        # obj filter
        self.edit_ans_obj_filter = widgets.Dropdown(description = "Object", options = self.kg.object_list)
        # confirm button
        self.edit_ans_confirm_button = widgets.Button(description = "Edit Answer")
        self.edit_ans_confirm_button.on_click(self.edit_ans_eventhandler)
        # sentence output
        self.edit_ans_field = widgets.Textarea(description = "Answer", placeholder = "No sentence available for current selection.", disabled = True)
        self.edit_ans_output = widgets.Output()
        self.edit_ans = widgets.VBox([widgets.HBox([self.edit_ans_sub_filter, self.edit_ans_rel_filter, self.edit_ans_obj_filter]), 
                                      widgets.HBox([self.edit_ans_field, self.edit_ans_confirm_button, self.edit_ans_output])])
        
        # generate observer
        self.edit_ans_sub_filter.observe(self.ans_sub_filter_eventhandler, names="value")
        self.edit_ans_rel_filter.observe(self.ans_rel_filter_eventhandler, names="value")
        self.edit_ans_obj_filter.observe(self.ans_obj_filter_eventhandler, names="value")
        
        self.edit_tab = widgets.Tab(children = [self.edit_node, self.edit_rel, self.edit_topic, self.edit_ans])
        self.edit_tab._titles = {0:"Edit Nodes",1:"Edit Relations", 2:"Edit Topics", 3:"Edit Candidate Answers"}
        
        display(self.edit_tab)
        print("\n")
        
    def initialize_remv(self):
        """ Initializes the removing portion of the dashboard."""
        
        self.node_text2 = widgets.Dropdown(description = "Node", options = list(set(self.kg.subject_list).union(set(self.kg.object_list))))
        
        self.edge_subject_text2 = widgets.Dropdown(description = "Subject", options = self.kg.subject_list)
        self.edge_object_text2 = widgets.Dropdown(description = "Object", options = self.kg.object_list)
        self.edge_relation_text2 = widgets.Output(description = "Relation")
        
        self.edge_subject_text2.observe(self.edge_subject_text2_eventhandler, names='value')
        self.edge_object_text2.observe(self.edge_object_text2_eventhandler, names='value')
        
        self.node_button2 = widgets.Button(description="Remove node from KG")
        self.edge_button2 = widgets.Button(description="Remove edge from KG")
                                           
        self.node_button2.on_click(self.node_button_click_eventhandler2)
        self.edge_button2.on_click(self.edge_button_click_eventhandler2)        
        
        self.update_rem_edge_output()
        
        display(widgets.HBox((self.node_text2, self.node_button2)))
        display(widgets.HBox((self.edge_subject_text2, self.edge_object_text2, self.edge_relation_text2, self.edge_button2)))
                                           
    def common_filtering(self,rel,top): 
        self.output.clear_output()
        with self.output:
            if (rel == 'ALL') & (top == 'ALL'):
                self.kg.display_filtered_graph()
            elif (rel == 'ALL'):
                self.kg.display_filtered_graph(top=top)
            elif (top == 'ALL'):
                self.kg.display_filtered_graph(rel=rel)
            else:
                self.kg.display_filtered_graph(rel=rel, top=top)

    def dropdown_relation_eventhandler(self, change):
        self.common_filtering(change.new, self.dropdown_entity.value)

    def dropdown_entity_eventhandler(self, change):
        self.common_filtering(self.dropdown_relation.value, change.new)

    def refresh_btn_eventhandler(self, b):
        self.common_filtering(self.dropdown_relation.value,self.dropdown_entity.value)
        
    def node_button_click_eventhandler(self,b):
        thread = threading.Thread(target=self.add_node_event, daemon=True)
        thread.start()
        # create thread and start thread
    
    def node_button_click_eventhandler2(self,b):
        thread = threading.Thread(target=self.rem_node_event, daemon=True)
        thread.start()
    
    def edge_subject_text2_eventhandler(self, change):
        self.update_rem_edge_output()
    
    def edge_object_text2_eventhandler(self, change):
        self.update_rem_edge_output()
        
    def update_rem_edge_output(self):
        with self.edge_relation_text2:
            self.edge_relation_text2.outputs = ()
            sub = self.kg.triplets_map[self.edge_subject_text2.value]
            obj = self.kg.triplets_map[self.edge_object_text2.value]
            try:
                txt = self.kg.edges[(sub,obj)]
            except KeyError:
                txt = "NO RELATION "
            self.edge_relation_text2.append_stdout(txt)
                                           
    def add_node_event(self):
        with self.node_output:
            self.node_output.outputs = ()
            self.node_output.append_stdout(f"\'{self.node_text.value}\' added to KG as a node. ")
            self.kg.add_as_node(self.node_text.value) # to-do : add_as_node
            self.node_text.value = "" # reset text inside
            
            # refresh dropdown list
            self.refresh_dropdowns()
            
            time.sleep(3.0)
            self.node_output.outputs = ()
    
    def edit_node_eventhandler(self, b):
        old_node = self.edit_node_dropdown.value
        new_node = self.edit_node_textbox.value
        self.kg.edit_node(old_node, new_node)
        self.refresh_dropdowns()
        # refresh graph as well
        self.common_filtering(self.dropdown_relation.value,self.dropdown_entity.value)
        
    def edit_rel_eventhandler(self, b):
        old_rel = self.edit_rel_dropdown.value
        new_rel = self.edit_rel_textbox.value
        self.kg.edit_edge(old_rel, new_rel)
        self.refresh_dropdowns()
        
    def edit_topic_eventhandler(self, b):
        old_topic = self.edit_topic_dropdown.value
        new_topic = self.edit_topic_textbox.value
        self.kg.edit_topic(old_topic, new_topic)
        self.dropdown_entity.options = self.kg.topics_list

    def edit_ans_eventhandler(self, b):
        thread = threading.Thread(target=self.edit_ans_event, daemon=True)
        thread.start()
            
    def edit_ans_event(self):    
        sub = self.edit_ans_sub_filter.value
        rel = self.edit_ans_rel_filter.value
        obj = self.edit_ans_obj_filter.value
        ans = self.edit_ans_field.value
        response = self.kg.edit_ans(sub, rel, obj, ans)
        with self.edit_ans_output:
            self.edit_ans_output.outputs = ()
            if response:
                self.edit_ans_output.append_stdout(f"Canditate answer successfully updated.")
            else:
                self.edit_ans_output.append_stdout(f"Error. Triplet not found between selection.")
            time.sleep(3.0)
            self.edit_ans_output.outputs = ()
            
            
    def ans_sub_filter_eventhandler(self, change):
        return self.ans_filter(change.new, self.edit_ans_rel_filter.value, self.edit_ans_obj_filter.value)
    
    def ans_rel_filter_eventhandler(self, change):
        return self.ans_filter(self.edit_ans_sub_filter.value, change.new, self.edit_ans_obj_filter.value)
    
    def ans_obj_filter_eventhandler(self, change):
        return self.ans_filter(self.edit_ans_sub_filter.value, self.edit_ans_rel_filter.value, change.new)
    
    def ans_filter(self, sub, rel, obj):
        ans = self.kg.get_sentence(sub, rel, obj)
        if ans is None:
            self.edit_ans_field.disabled = True
        else:
            self.edit_ans_field.value = ans.item()
            self.edit_ans_field.disabled = False
    
    def rem_node_event(self):
        self.kg.remove_node(self.node_text2.value)
        self.refresh_dropdowns()
        self.update_rem_edge_output()

    def triplet_button_click_eventhandler(self, b):
        thread = threading.Thread(target=self.add_triplet_event, daemon=True)
        thread.start()
    
    def edge_button_click_eventhandler2(self,b):
        thread = threading.Thread(target=self.rem_edge_event, daemon=True)
        thread.start()
        
    def add_triplet_event(self):
        """
        Adds triplet to graph.
        Adds nodes to graph if not already in graph.
        """
        with self.triplet_output:
            # Add as edge
            success = self.kg.add_as_edge(self.subject_text.value, self.object_text.value, self.relation_text.value) 
            
            if success:
                self.triplet_output.append_stdout(f"Edge \'{self.relation_text.value}\' added between subject \'{self.subject_text.value}\' and object \'{self.object_text.value}\'. ")

                subject_topics = self.kg.get_tags(self.subject_text.value, "subject")
                object_topics = self.kg.get_tags(self.object_text.value, "object")

                # Add to dataframe
                self.kg.add_triplet_to_df(self.answer_text.value, self.subject_text.value, self.relation_text.value,
                                         self.object_text.value, self.kg.triplets_map[self.subject_text.value], self.kg.triplets_map[self.object_text.value],
                                         subject_topics, object_topics)
            else:
                self.triplet_output.append_stdout(f"Unable to add edge. A relationship already exists between the two nodes.")
            
            self.subject_text.value = "" # reset text inside
            self.object_text.value = "" # reset text inside
            self.relation_text.value = "" # reset text inside
            self.answer_text.value = "" # reset text inside
            
            # refresh dropdown list
            self.refresh_dropdowns()
            
            time.sleep(3.0)
            self.triplet_output.outputs = ()
            
    def rem_edge_event(self):
        self.kg.remove_edge(self.edge_subject_text2.value, self.edge_object_text2.value)
        self.dropdown_relation.options = self.kg.relation_list
        self.update_rem_edge_output()
            
        
    def refresh_dropdowns(self):
            self.subject_text.options = list(set(self.kg.subject_list).union(set(self.kg.object_list)))
            self.object_text.options = list(set(self.kg.subject_list).union(set(self.kg.object_list)))
            self.relation_text.options = self.kg.relation_list
            self.node_text2.options = list(set(self.kg.subject_list).union(set(self.kg.object_list)))
            self.dropdown_relation.options = self.kg.relation_list
            self.dropdown_entity.options = self.kg.topics_list
            self.edge_subject_text2.options = self.kg.subject_list
            self.edge_object_text2.options = self.kg.object_list
            self.edit_node_dropdown.options = list(set(self.kg.subject_list).union(set(self.kg.object_list)))
            self.edit_rel_dropdown.options = self.kg.relation_list[1:] # without ALL
            self.edit_topic_dropdown.options = self.kg.topics_list[1:] # without ALL
            self.edit_ans_sub_filter.options = self.kg.subject_list
            self.edit_ans_rel_filter.options = self.kg.relation_list[1:] # without ALL
            self.edit_ans_obj_filter.options = self.kg.object_list
            

In [88]:
triplets_df = pd.read_json(r"json_extract_5.json")
kg = KnowledgeGraph(triplets_df)
d = Dashboard(kg)

HBox(children=(Dropdown(description='Filter by relation', options=('ALL', "'m driving", 'Driving', '[is] Compu…

Output()





HBox(children=(Combobox(value='', description='Subject', options=('to TDVL', 'PDVL courses', 'the " Assessment…

Textarea(value='', description='Intended answer', placeholder='Write the intended answer for the query here.')





Tab(children=(VBox(children=(Dropdown(description='Node to edit', options=('to TDVL', 'PDVL courses', 'the " A…





HBox(children=(Dropdown(description='Node', options=('to TDVL', 'PDVL courses', 'the " Assessment on Fitness t…

HBox(children=(Dropdown(description='Subject', options=('3 questions in English', 'A PDVL', 'A chauffeur - dri…