In [168]:
import networkx as nx
import pandas as pd
# from dtl.graph_dtl import GraphDTL


class GraphBaseModule(object):
    def __init__(self, nodes=None, edges=None, read_from_db=False):
        self.nodes = nodes
        self.edges = edges

#         if read_from_db:
#             self.graph_dtl = GraphDTL()
#             self.nodes = self.graph_dtl.get_nodes()
#             self.edges = self.graph_dtl.get_edges


        if not (self.nodes and self.edges):
            self.nodes = pd.read_csv(
                "../sample_data/MajedAlasmariAP-nodes.csv")
            self.edges = pd.read_csv(
                "../sample_data/MajedAlasmariAP-edges.csv")

    def get_graph(self):
        return self.DG

    def create(self):
        self.DG = nx.DiGraph()

        for idx, node in self.nodes.iterrows():

            node_id = node['Id']
            del node['Id']

            attributes = {node_id: node}

            self.DG.add_node(node_id)
            nx.set_node_attributes(self.DG, attributes)

        for idx, edge in self.edges.iterrows():
            source, target = edge['Source'], edge['Target']
            del edge['Source'], edge['Target']

            attributes = {(source, target): edge}

            self.DG.add_edge(source, target)
            nx.set_edge_attributes(self.DG, attributes)

    def show_random(self):
        import matplotlib.pyplot as plt

        plt.figure(figsize=(15, 12))
        nx.draw_random(self.DG)
        plt.show()

    

In [264]:
import re
import networkx as nx
# from graph.base import GraphBaseModule
from collections import Counter


class FilterModule(GraphBaseModule):
    def __init__(self, DG=None):
        self.DG = DG
        if not self.DG:
            super(FilterModule, self).__init__()
            self.create()

    # attr_value can be a regex
    def node_attribute_partition(self, attr_name: str, attr_value: str):
        filtered_nodes = [node for node, value in self.DG.nodes(
            data=True) if re.search(str(attr_value), str(value.get(attr_name, "")))]
        return filtered_nodes

    # attr_value can be a regex
    def edge_attribute_partition(self, attr_name: str, attr_value: str):
        filtered_edges = [(source, target) for source, target, value in self.DG.edges(
            data=True) if re.search(str(attr_value), str(value.get(attr_name, "")))]
        return filtered_edges

    def node_quantitative_attribute(self, attr_name: str, attr_range: tuple = None):
        start, end = attr_range
        filtered_nodes = [node for node, value in self.DG.nodes(data=True) if value.get(
            attr_name, None) <= end and value.get(attr_name, None) >= start]
        return filtered_nodes

    def edge_quantitative_attribute(self, attr_name: str, attr_range: tuple = None):
        start, end = attr_range
        filtered_edges = [(source, target) for source, target, value in self.DG.edges(
            data=True) if value.get(attr_name, None) <= end and value.get(attr_name, None) >= start]
        return filtered_edges

    def search(self, attrs_names, attr_value):  # , close_items: bool = False):
        # matched_nodes = list()
        # matched_edges = list()

        close_nodes = list()
        # close_edges = list()

        for attr_name in attrs_names:
            # matched_nodes += [node for node, value in self.DG.nodes(
            #     data=True) if re.fullmatch(str(attr_value), str(value.get(attr_name, "")))]
            close_nodes += [node for node, value in self.DG.nodes(
                data=True) if re.search(str(attr_value), str(value.get(attr_name, "")))]

            # matched_edges += [(source, target) for source, target, value in self.DG.edges(
            #     data=True) if re.fullmatch(str(attr_value), str(value.get(attr_name, "")))]
            # if close_items:
            #     close_edges += [(source, target) for source, target, value in self.DG.edges(
            #         data=True) if re.search(str(attr_value), str(value.get(attr_name, "")))]

        # matched_items = {"nodes": matched_nodes, "edges": matched_edges}
        closed_items = {"nodes": close_nodes}  # , "edges": close_edges}

        # if close_items:
        #     return matched_items, closed_items
        # return matched_items
        return closed_items

#     def get_edges_feature_names(self):
#         feature_names = list()
#         for _, _, features in self.DG.edges(data=True):
#             feature_names = features.keys()
#             break
#         return feature_names

#     def get_all_edges_features(self):
#         edge_feature_names = self.get_edges_feature_names()
#         edge_features = dict()
#         for feature_name in edge_feature_names:
#             edge_features[feature_name] = list()

#         for _, _, features in self.DG.edges(data=True):
#             for feature_name, feature_value in features.items():
#                 edge_features[feature_name].append(feature_value)

#         return edge_features

    def get_nodes_feature_names(self):
        feature_names = list()
        for _, features in self.DG.nodes(data=True):
            feature_names = features.keys()
            break
        return feature_names

    def get_all_nodes_features(self):
        node_feature_names = self.get_nodes_feature_names()
        node_features = dict()
        for feature_name in node_feature_names:
            node_features[feature_name] = list()

        for _, features in self.DG.nodes(data=True):
            for feature_name, feature_value in features.items():
                node_features[feature_name].append(feature_value)

        return node_features

    def identify_string_feature_type(self, feature_values):
        feature_type = None
        for value in feature_values:
            if value is not None:
                if type(value) is str:
                    feature_type = "str"
                    break
        return feature_type

    def check_repetitivity(self, feature_values):
        feature_type = self.identify_string_feature_type(feature_values)
        if feature_type != "str":
            return False

        value_counts = dict(Counter(feature_values))
        
        if any(map(lambda item: item[1] > 1 and not pd.isna(item[0]), value_counts.items())):
            return True
        else:
            return False

    def get_repetitive_features(self, features):
        repetitive_features = dict()
        for feature_name, feature_values in features.items():
            if re.search(feature_name, "_class") or self.check_repetitivity(feature_values):
                repetitive_features[feature_name] = feature_values
            else:
                continue

        return repetitive_features

    def validate_frequencies(self, frequencies):
        validated_frequencies = dict()
        for value, count in frequencies.items():
            if count >= 2: #and not pd.isna(value):
                validated_frequencies[value] = count

        return validated_frequencies

    def compute_repetitive_features_frequencies(self, repetitive_features):
        for feature_name, feature_values in repetitive_features.items():
            frequencies = Counter(feature_values)
            revised_frequencies = self.validate_frequencies(frequencies)
            repetitive_features[feature_name] = dict(revised_frequencies)

        return repetitive_features

    def identify_repetitive_features(self):
        nodes_features = self.get_all_nodes_features()
        nodes_repetitive_features = self.get_repetitive_features(
            nodes_features)
        node_repeatetie_feature_frequencies = self.compute_repetitive_features_frequencies(
            nodes_repetitive_features)

#         edges_features = self.get_all_edges_features()
#         edges_repetitive_features = self.get_repetitive_features(
#             edges_features)
#         edge_repeatetie_feature_frequencies = self.compute_repetitive_features_frequencies(
#             edges_repetitive_features)

        feature_filters = self.create_filters(node_repeatetie_feature_frequencies)
        
        return feature_filters
    
    def create_filters(self, features_frequencies):
        features_filters = dict()
        all_features_names = self.get_nodes_feature_names()
        for feature_name, feature_frequencies in features_frequencies.items():
            features_filters[feature_name] = self.identify_frequent_variable_properties(feature_name, feature_frequencies)
            all_features_names.remove(feature_name)
        
        numerical_features_filters = self.identify_numerical_features_filters(all_features_names)
        return features_filters.update(numerical_features_filters)
    
    def get_node_feature_values_range(self, feature_name):
        values = list()
        for _, features in self.DG.nodes(data=True):
            values.append(features[feature_name])
            
        return min(values), max(values)
    
    def is_numerical(self, feature_name):
        feature_type = None
        for _, features in self.DG.nodes(data=True):
            tpe = type(features[feature_name])
            if (tpe is float and not pd.isna(features[feature_name])) or tpe is int:
                return True
            else:
                continue
        
        return False
        
    def identify_numerical_features_filters(self, possible_numerical_features):
        numerical_features_filters = dict()
        for feature_name in possible_numerical_features:
            if self.is_numerical(feature_name):
                values = self.
                min_value, max_value = self.get_node_feature_values_range(feature_name)
                numerical_features_filters[feature_name] = {"editable": True,
                                                           "enable": True,
                                                           "type": "range",
                                                           "options": [min_value, max_value],
                                                           "value": [min_value, max_value]}
                
            else:
                continue
                
        return numerical_features_filters
        
        
    def identify_frequent_variable_properties(self, feature_name, feature_frequencies):
        sorted_items = dict(sorted(feature_frequencies.items(), key=lambda item: item[1]))
        
        if "_class" in feature_name:
            return {"editable": True,
                   "enable": True,
                   "type": "enum-eq",
                   "options": sorted_items.values(),# frontend gets the count of each frequent value as options
                   "value": sorted_items.keys()}
        else:
            return {"editable": True,
                   "enable": True,
                   "type": "string",
                   "options": sorted_items.values(),# frontend gets the count of each frequent value as options
                   "value": sorted_items.keys()} 

SyntaxError: invalid syntax (<ipython-input-264-0028b6717ee0>, line 190)

In [256]:
module = FilterModule()

In [257]:
freq = module.identify_repetitive_features()

In [258]:
import networkx as nx
# from graph.base import GraphBaseModule


class TopologyModule(GraphBaseModule):
    def __init__(self, DG=None):
        self.DG = DG
        if not self.DG:
            super(TopologyModule, self).__init__()
            self.create()

    def mutuals(self, node: str = None, depth_limit: int = None):
        mutual_graph = nx.Graph()

        if depth_limit is None:
            nodes = list(self.DG.nodes())
            num_nodes = len(nodes)

            for i in range(num_nodes):
                for j in range(i+1, num_nodes):
                    if self.DG.has_edge(nodes[i], nodes[j]) and self.DG.has_edge(nodes[j], nodes[i]):
                        mutual_graph.add_edge(nodes[i], nodes[j])

            return mutual_graph

        else:
            not_visited = list()
            not_visited.append(node)

            depth = 0
            while len(not_visited) != 0:
                depth += 1
                current_node = not_visited.pop(0)
                for (source, target) in self.DG.edges(current_node):
                    if self.DG.has_edge(target, source):
                        not_visited.append(target)
                        mutual_graph.add_edge(source, target)

                if depth == depth_limit:
                    break

            return mutual_graph


    def neighbourhood_depth(self, node, neighbour_depth):
        neighbours_graph = nx.DiGraph()
        for source, target in nx.bfs_edges(self.DG, node, depth_limit=neighbour_depth):
            neighbours_graph.add_edge(source, target)
            
            edge_attributes = {(source, target): self.DG[source][target]}
            nx.set_edge_attributes(neighbours_graph, edge_attributes)
            
            source_node_attributes = {source: self.DG.nodes(data=True)[source]}
            target_node_attributes = {target: self.DG.nodes(data=True)[target]}

            nx.set_node_attributes(neighbours_graph, source_node_attributes)
            nx.set_node_attributes(neighbours_graph, target_node_attributes)

        return neighbours_graph

        #         neighbours_graph = nx.DiGraph()

        #         visited = set()
        #         not_visited = list()
        #         not_visited.append(node)


        #         depth = 0
        #         while len(not_visited) != 0:
        #             depth += 1
        #             current_node = not_visited.pop(0)
        #             for source, target in self.DG.edges(current_node):
        #                 if target not in visited:
        #                     not_visited.append(target)
        #                     neighbours_graph.add_edge(source, target)

        #             visited.add(current_node)

        #             if depth == neighbor_depth:
        #                 break

        #         return neighbours_graph


In [259]:
t_module = TopologyModule()

In [260]:
neighbourhood_graph = t_module.neighbourhood_depth("NasserAlajmi89", neighbour_depth=3)

In [261]:
import json


class QuerySerializer:
    def __init__(self, DG):
        self.DG = DG

    def serialize(self, features_filters=None, settings=None, center_node_id=None, limit=None, has_access=None):
        query_result = dict()

        query_result["graph"] = self.create_graph_interface(features_filters=features_filters, settings=settings, center_node_id=center_node_id)
        query_result["user"] = self.create_user_data(limit, has_access)

        return json.dumps(query_result)

    def create_graph_interface(self, node_new_feature=None, edge_new_feature=None, features_filters=None, settings=None, center_node_id=None):
        nodes = dict()
        if node_new_feature:
            for node, features in self.DG.nodes(data=True):
                features = dict(features).update(node_new_feature)
                nodes[node] = features
        else:
            nodes = {node: features for node , features in self.DG.nodes(data=True)}
        
        edges = dict()
        if edge_new_feature:
            for source, target, features in self.DG.edges(data=True):
                features = dict(features).update(edge_new_feature)
                features["from"], features["to"] = source, target
                edges[features["Id"]] = features
                
        else:
            for source, target, features in self.DG.edges(data=True):
                features["from"], features["to"] = source, target
                edges[features["Id"]] = features
        
        meta = self.create_graph_meta(features_filters, settings, center_node_id)
        return {"nodes": nodes,
                "edges": edges,
                "meta": meta}

    def create_graph_meta(self, features_filters=None, settings=None, center_node_id=None):
        settings = self.create_settings(settings)
        filters = self.create_filters(features_filters)

        return {"settings": settings,
                "filters": filters,
                "centerNodeId": center_node_id}

    def create_settings(self, settings=None):
        serialized_settings = dict()
        serialized_settings["layout"] = {"enable": True,
                                        "editable": True,
                                        "type": "enum-eq",
                                        "options": ['forceatlas2', 'force'],
                                        "value": 'forceatlas2'}
        
        serialized_settings["nodeLabelStyle"] = {"enable": True,
                                                "editable": True,
                                                "type": "other", 
                                                "value": {"fixed": True, "size": 10, "color": "blue" }}
        
        return serialized_settings

    def create_filters(self, features_filters=None):
        serilized_filters = dict()
        serilized_filters["nodeMetrics"] = {"enable": True,
                                           "editable": True,
                                           "type": "object",
                                           "children": features_filters}
        
        serilized_filters["depth"] = {"enable": True,
                                     "editable": True,
                                     "type": "enum-leq",
                                     "options": [1, 2, 3],
                                     "value": 3}

        return serilized_filters

    def create_user_data(self, limit=None, has_access=None):
        return {"nodeCountLimit": limit,
                "hasAccess": has_access}

In [262]:
query_serializer = QuerySerializer(neighbourhood_graph)

In [263]:
query_serializer.serialize()
# for s, t, f in query_serializer.DG.edges(data=True):
#     print(s)
#     print(t)
#     print(f)
#     break

