In [6]:
import pandas as pdt
import numpy as np 
import rdflib 
from rdflib import Namespace 
import yaml
import networkx as nx

In [178]:
class Test():
    def __init__(self):
        self.mappedEntities = [{'type': 'filtering', 'entity': 'water quality', 'class': ['Dataset', 'Project', 'Program'], 'related': [{'text': 'Water Quality & Watersheds Program', 'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b', 'class': 'http://vivoweb.org/ontology/core#Program', 'idx': 5134, 'color': '#1a9850'}, {'text': 'Nitrate Groundwater Pollution Hazard Index', 'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#tol_b14e38', 'class': 'http://www.sdsconsortium.org/schemas/sds-okn.owl#Tool', 'idx': 6775, 'color': '#d73027'}, {'text': 'Safe Drinking Water, Water Quality and Supply, Flood Control, River and Coastal Protection Bond Act of 2006', 'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#gmt_3b0a8a', 'class': 'http://www.sdsconsortium.org/schemas/sds-okn.owl#BestPracticesAndMandates', 'idx': 155, 'color': '#d73027'}, {'text': 'Water Resources Specialist', 'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#rol_9b72be', 'class': 'http://purl.obolibrary.org/obo/BFO_0000023', 'idx': 6344, 'color': '#d73027'}, {'text': 'Advanced Water Purification Facility', 'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#inf_1640d2', 'class': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/sustsource.owl#Infrastructure', 'idx': 605, 'color': '#d73027'}, {'text': 'Healthy Watersheds', 'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_24edc1', 'class': 'http://vivoweb.org/ontology/core#Program', 'idx': 4982, 'color': '#1a9850'}]}, {'type': 'target', 'entity': 'collaborators', 'class': ['Person', 'Organization']}]
        self.data = pd.read_csv('../data/nodes_info.csv')
        self.phraseEmbs = np.load('../data/node_phrasebert_embeddings.npy')
        self.linkml, self.g  = self.readKG("../data/PPOD.yaml", "../data/PPOD_CA.ttl")
        self.ont = nx.read_graphml('../data/ontology_ppod.graphml')
#         self.relevantEntId = [5134, 4982, 5021, 5043, 5648]

        self.relevantEntId = [5134]
    def readKG(self, file1, file2):
        g = rdflib.Graph()
        g.parse(file2, format="turtle")
        with open(file1, 'r') as file:
            linkml = yaml.safe_load(file)
            prefix = linkml['prefixes']
            for ele in prefix:
                g.bind(ele, Namespace(prefix[ele]))
            return linkml, g 
 
    def getTargetEntities(self, target_uris, target_classes, node_class, node_id):
        graph = []
        for i in range(len(target_uris)):
            uri = target_uris[i]
            query = f"""
            SELECT ?target ?label 
                WHERE {{
                    BIND(<{node_id}> AS ?startNode)
                    ?startNode (<>|!<>)* ?target .
                    ?target rdf:type <{uri}> .
                    ?target rdfs:label ?label . 
                }}
            """
#             print(query)
            results = self.g.query(query)
        
            for row in results:
#                 print(str(row['target'].toPython()))
                graph.append({
                    "target_entity": str(row['target'].toPython()),
                    "target_label": str(row['label'].toPython()),
                    "target_uri": uri,
                    "targte_class": target_classes[i],
                })
        return graph 
        
    def getNetworkData(self):
        class2URI = {
            'Dataset': "http://vivoweb.org/ontology/core#Dataset",
            'Best Practices And Mandates': 'http://www.sdsconsortium.org/schemas/sds-okn.owl#BestPracticesAndMandates',
            'Infrastructure': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/sustsource.owl#Infrastructure',
            'Organization': 'http://xmlns.com/foaf/0.1/Organization',
            'Person': 'http://xmlns.com/foaf/0.1/Person',
            'Program': 'http://vivoweb.org/ontology/core#Program',
            'Project': 'http://vivoweb.org/ontology/core#Project',
            'Issue': 'http://purl.obolibrary.org/obo/BFO_0000023',
            'Tool': 'http://www.sdsconsortium.org/schemas/sds-okn.owl#Tool'
        }
        URI2class = {value: key for key, value in class2URI.items()}
        target_class = []
        for ele in self.mappedEntities:
            if ele['type']=='target':
                target_class += ele['class']
        target_class = list(set(target_class))
        target_uris = [class2URI[ele] for ele in target_class]
        
        graph_data = []
        for ele in self.mappedEntities:
            if ele['type']=='filtering':
                for ent in ele['related']:
                    idx = ent['idx']
                    print(idx)
                    node_id = self.data.iloc[idx]['entity'] ## current enitty id
                    node_class_uri = self.data.iloc[idx]['type']
                    node_class = URI2class[node_class_uri]
                    small_graph = self.getTargetEntities( target_uris, target_class, node_class, node_id)
                    graph_data.append({
                        'source_label': ent['text'],
                        'source_class': node_class, 
                        'source_uri': node_class_uri,
                        'source_id': node_id,
                        'source_color': ent['color'],
                        'target_nodes': small_graph
                    })
        return graph_data


In [179]:
t = Test()

In [180]:
graph_data_test = t.getNetworkData()

5134
6775
155
6344
605
4982


In [182]:
graph_data_test

[{'source_node': {'text': 'Water Quality & Watersheds Program',
   'entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b',
   'class': 'http://vivoweb.org/ontology/core#Program',
   'idx': 5134,
   'color': '#1a9850'},
  'target_nodes': [{'source_entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b',
    'target_entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#org_902d0e',
    'target_label': "Governor's Office of Planning and Research",
    'target_uri': 'http://xmlns.com/foaf/0.1/Organization',
    'targte_class': 'Organization',
    'source_class': 'Program'},
   {'source_entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b',
    'target_entity': 'https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#org_114dfb',
    'target_label': 'Bay Planning Coalition',
    'target_uri': 'htt

In [110]:
# query = """
# SELECT ?target
#     WHERE {{
#         BIND(<https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b> AS ?startNode)
#         ?startNode ?p ?target .
#         ?target rdf:type <http://xmlns.com/foaf/0.1/Organization> .
#     }}

# """

In [116]:
query = f"""

SELECT ?target
    WHERE {{
        BIND(<https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b> AS ?startNode)
        ?startNode ((<>|^<>))* ?target .
        ?target rdf:type <http://xmlns.com/foaf/0.1/Organization> .
    }}
"""

In [142]:
query = f"""

SELECT ?target 
    WHERE {{
        BIND(<https://raw.githubusercontent.com/adhollander/FSLschemas/main/CA_PPODterms.ttl#prg_c46c0b> AS ?startNode)
        ?startNode (<>|!<>)* ?target .
        ?target rdf:type <http://xmlns.com/foaf/0.1/Person> .

    }}
"""

In [143]:
results = t.g.query(query)

In [144]:
graph = []
for row in results:
    print(row)

In [84]:
results

<rdflib.plugins.sparql.processor.SPARQLResult at 0x30fddb510>