In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import scipy
import time
import uuid

import matplotlib.pyplot as plt
import networkx as nx

from req_analysis import Requirement

## Initialize connection to Neptune

In [None]:
%run 'req_analysis/neptune-util.py'
%env NEPTUNE_CLUSTER_ENDPOINT=ec2-18-224-170-154.us-east-2.compute.amazonaws.com
%env NEPTUNE_CLUSTER_PORT=8182
g = neptune.graphTraversal()

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON, POST, DIGEST, URLENCODED

sparql = SPARQLWrapper("http://ec2-18-224-170-154.us-east-2.compute.amazonaws.com:3001/sparql")
sparql.requestMethod = 'urlencoded'

In [None]:
queryString = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix xml: <http://www.w3.org/XML/1998/namespace/>
prefix mms-ontology: <https://opencae.jpl.nasa.gov/mms/rdf/ontology/>
prefix mms-graph: <https://opencae.jpl.nasa.gov/mms/rdf/graph/>
prefix mms-property: <https://opencae.jpl.nasa.gov/mms/rdf/property/>
prefix mms-class: <https://opencae.jpl.nasa.gov/mms/rdf/class/>
prefix mms-element: <https://opencae.jpl.nasa.gov/mms/rdf/element/>
prefix mms-artifact: <https://opencae.jpl.nasa.gov/mms/rdf/artifact/>
prefix mms-index: <https://opencae.jpl.nasa.gov/mms/rdf/index/>
prefix xmi: <http://www.omg.org/spec/XMI/20131001#>
prefix uml: <http://www.omg.org/spec/UML/20161101#>
prefix uml-model: <https://www.omg.org/spec/UML/20161101/UML.xmi#>
prefix uml-primitives: <https://www.omg.org/spec/UML/20161101/PrimitiveTypes.xmi#>
prefix uml-class: <https://opencae.jpl.nasa.gov/mms/rdf/uml-class/>
prefix uml-property: <https://opencae.jpl.nasa.gov/mms/rdf/uml-property/>

select * from mms-graph:data.tmt {
    ?subject ?edge ?str .

    values ?edge {
        mms-property:bodyStringFromComment
        mms-property:valueString
    }
    
    filter(contains(?str, "REQ-") && strlen(?str) > 20)
    # filter(isLiteral(?str) && ?str != "")
    
    values ?subject { mms-element:_831_331c90b6-9e17-4c46-8530-0fc345e0a22f_asi-slot-_11_5EAPbeta_be00301_1147873226632_528960_2311-slotvalue-0-literalstring }
}
"""

In [None]:
queryElements = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix xml: <http://www.w3.org/XML/1998/namespace/>
prefix mms-ontology: <https://opencae.jpl.nasa.gov/mms/rdf/ontology/>
prefix mms-graph: <https://opencae.jpl.nasa.gov/mms/rdf/graph/>
prefix mms-property: <https://opencae.jpl.nasa.gov/mms/rdf/property/>
prefix mms-class: <https://opencae.jpl.nasa.gov/mms/rdf/class/>
prefix mms-element: <https://opencae.jpl.nasa.gov/mms/rdf/element/>
prefix mms-artifact: <https://opencae.jpl.nasa.gov/mms/rdf/artifact/>
prefix mms-index: <https://opencae.jpl.nasa.gov/mms/rdf/index/>
prefix xmi: <http://www.omg.org/spec/XMI/20131001#>
prefix uml: <http://www.omg.org/spec/UML/20161101#>
prefix uml-model: <https://www.omg.org/spec/UML/20161101/UML.xmi#>
prefix uml-primitives: <https://www.omg.org/spec/UML/20161101/PrimitiveTypes.xmi#>
prefix uml-class: <https://opencae.jpl.nasa.gov/mms/rdf/uml-class/>
prefix uml-property: <https://opencae.jpl.nasa.gov/mms/rdf/uml-property/>

select * from mms-graph:data.tmt {
    ?element a/rdfs:subClassOf* uml-class:Class
    {
        ?element rdfs:label ?label 
    } union {
        ?element mms-property:name ?label
    }
    
    filter(isLiteral(?label) && ?label != "")
}
"""

## Clear output graph

In [None]:
sparql.setMethod(POST)
sparql.setQuery("""
clear graph <https://opencae.jpl.nasa.gov/mms/rdf/graph/autocref.tmt.test>
""")
results = sparql.query()
print(results.response.read())

## Load model elements in memory

In [None]:
sparql.setQuery(queryElements)

sparql.setReturnFormat(JSON)
el_results = sparql.query().convert()

In [None]:
model_elements = []
for result in el_results["results"]["bindings"]:
    model_elements.append(dict(uri=result['element']['value'], name=result['label']['value']))

## Load text element(s) of interest

In [None]:
sparql.setQuery(queryString)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

## Matching

In [None]:
time1 = time.time()
for result in results["results"]["bindings"]:
#     if str.startswith(result["str"]["value"], "<html>"):
    req_object = Requirement(result["subject"]["value"], result["str"]["value"])
    matches, c = req_object.match_req_tokens(model_elements, 0.0030)
    if len(matches)>3:
        print('Req ID: ', result["subject"]["value"], '\nReq text:' , (result["str"]["value"]))
        print(matches, '\n___________')
        print(c, 'comparaisons')
        print('Time: ', time.time()-time1)
        break

In [None]:
g = neptune.graphTraversal()
req_object.init_match_subgraph(g)

In [None]:
pos = nx.circular_layout(req_object.req_subgraph)
nx.draw_networkx_edge_labels(req_object.req_subgraph, pos)
nx.draw_circular(req_object.req_subgraph, with_labels=True)

In [None]:
req_object.req_subgraph.nodes(data=True)

In [None]:
winners = req_object.match_clustering()

for winner in winners.values():
    print('Token: ', winner['token']['text'])
    print('Element: ', winner['model_element']['name'])
    print('URI: ', winner['model_element']['uri'], '\n_________')

In [None]:
winners

In [None]:
insert_blocks = """
<{input_uri}> mms-autocref:reference mms-autocref-i:Reference.{reference_uuid} ;
  .

mms-autocref-i:Reference.{reference_uuid}
  a mms-autocref:Reference ;
  mms-autocref:inputText \"\"\"{input_text}\"\"\" ;
  mms-autocref:match <{match_uri}> ;
  mms-autocref:token mms-autocref-i:Token.{reference_uuid} ;
  .

mms-autocref-i:Token.{reference_uuid}
  a mms-autocref:Token ;
  mms-autocref:tokenStart "{token_position}"^^xsd:integer ;
  mms-autocref:tokenText \"\"\"{token_text}\"\"\" ;
  .
"""

In [None]:
insert_query = """
prefix mms-autocref: <https://opencae.jpl.nasa.gov/mms/rdf/autocref/>
prefix mms-autocref-i: <https://opencae.jpl.nasa.gov/mms/rdf/autocref-instance/>

insert data {{
  graph <https://opencae.jpl.nasa.gov/mms/rdf/graph/autocref.tmt.test> {{
    {insert_blocks}
  }}
}}"""

In [None]:
insert_concat = ""

for winner in winners.values():
    insert_concat += insert_blocks.format(input_uri = req_object.text_uri,
                                  input_text = req_object.text.replace('"', r'\"'),
                                  reference_uuid = uuid.uuid4().hex,
                                  match_uri = winner['model_element']['uri'],
                                  token_position = winner['token']['token_id'],
                                  token_text = winner['token']['text'].replace('"', r'\"'))

# print(insert_concat)
insert = insert_query.format(insert_blocks=insert_concat)
print(insert)

In [None]:
sparql.setMethod(POST)
sparql.setQuery(insert)


results = sparql.query()
print(results.response.read())

# MISCELLANEOUS 

In [None]:
req_object.match_clustering_stop_condition()

In [None]:
D = [[ 0.,  2.,  3. , 2.],
 [ 3. , 6.,  5.,  3.],
 [ 4. , 5.,  9.,  2.],
 [ 1. , 7., 11.,  4.],
 [ 8. , 9., 11.,  6.]]

In [None]:
from paris.utils import select_clustering, select_clustering_gen, select_clustering_gen2

In [None]:
# print(select_clustering_gen2(D, req_object.req_subgraph, 0))
# print(select_clustering_gen2(D, req_object.req_subgraph, 1))
# print(select_clustering_gen2(D, req_object.req_subgraph, 2))
# print(select_clustering_gen2(D, req_object.req_subgraph, 3))
# print(select_clustering_gen2(D, req_object.req_subgraph, 4))
print(select_clustering_gen2(D, req_object.req_subgraph, 5))

In [None]:
g = neptune.graphTraversal()
g.V('MMS_1461009357890_d8449449-d16f-43c7-90a4-ee5744566f84').repeat(both().simplePath()).until(hasId('MMS_1461009332936_4f59edac-f8f9-47e6-972b-fb68ca0f56ed').or_().loops().is_(8)).path().limit(1).toList()

### All matches:

In [None]:
for i in range(6):
    print(print(req_object.req_subgraph.nodes(data=True)[i]))

### Matches within the biggest cluster:

In [None]:
for node_i in match_clusters[0]:
    print('Token:\t', req_object.req_subgraph.nodes(data=True)[node_i]['token']['text'], '\nElement URI: ', req_object.req_subgraph.nodes(data=True)[node_i]['model_element']['uri'], '\n______')

## Misc Testing

In [None]:
summit='_4663af6e-6265-438d-87b8-74e1550518bf__65_6314a138-53b9-42a5-b488-9c23967ed889'
enclosure='_18_0_6_f1d0357_1490143302007_461579_179266'
mirror='_18_5_2_e64033a_1509475358071_810923_58742'
aps_user='_17_0_2_3_41e01aa_1389730769202_855584_72742'
aps_mission='_17_0_2_3_41e01aa_1389728637481_651557_61606'

In [None]:
g = neptune.graphTraversal()
g.V('_18_5_3_e64033a_1517517345905_946202_22722').repeat(both().simplePath()).until(hasId('_4663af6e-6265-438d-87b8-74e1550518bf__10_a7ad0d07-fa64-40e7-a266-db3006b20dc5').or_().loops().is_(8)).path().limit(1).toList()

In [None]:
g = neptune.graphTraversal()
g.V(enclosure).valueMap().toList()

In [None]:
import spacy

nlp_np = spacy.load("en_core_web_sm")
merge_nps = nlp_np.create_pipe("merge_noun_chunks")
nlp_np.add_pipe(merge_nps)


In [None]:
for t in nlp_np("Hello my name is the beautiful car of New York. Bye!"):
    print(t.text, t.pos_, t.idx)