In [12]:
import sys
sys.path.append('../')
from setting import config_read

In [13]:
from elasticsearch import Elasticsearch
import pandas as pd
from path import *
from graph import ConceptualGraphGenerator
from map import ResourceMapper

In [14]:
config = config_read('../')

# Elasticsearch
server_ip = config['elasticsearch']['ip']
index_name = config['elasticsearch']['name']
es = Elasticsearch(server_ip)

# Unit Path
unit_path = pd.read_csv('../unit_path.csv')
G = generate_graph(unit_path)

rm = ResourceMapper(config)
cgg = ConceptualGraphGenerator(config)

In [16]:
# user_input = '뱅뱅뱅 부른 빅뱅 소속사는?'
user_input = '용감한 형제가 작곡한 노래의 장르는?'
resource_combinations = rm.process(user_input)
conceptual_graph = cgg.generate_conceptual_graph(resource_combinations)

In [17]:
def get_tbox(resource):
    search_query = {"query":{"term":{"URI.keyword": resource}}}
    result = es.search(index=index_name, body=search_query)
    return result['hits']['hits'][0]['_source']['Tbox']

In [18]:
ca2sp = {}

for cg in conceptual_graph:

    for ca in cg:
        if ca in ca2sp: 
            continue
        else:
            ca2sp[ca] = []

        d,p,r = ca

        # Restict search space to Tbox level
        if not G.has_node(str(d)):
            d = get_tbox(d)
        else: d = [d]

        if not G.has_node(str(r)):
            r = get_tbox(r)
        else: r = [r]

        p = [p]

        # Find shortest path
        for u,e,v in itertools.product(d,p,r):
            if e == 'Any P': e=None

            score, result = find_shortest_path(G, u, v, e, weight=False) 
            if len(result) == 0: continue
            ca2sp[ca].append((score,result))

In [19]:
# Shortest Path
for k,v in ca2sp.items():
    if len(v) == 0:
        print(f'{k}의 shortest path : ')
        print(': 일치하는 shortest path가 없습니다.\n')
    else:
        print('-'*100)
        print(f'{k}의 shortest path : ')
        for elem in v:
            print(elem[1])
            # for e in elem:
            #     print(e)
print('-'*100)

----------------------------------------------------------------------------------------------------
('skmo:용감한_형제', 'skmo:isComposedBy', 'mo:Track')의 shortest path : 
[('foaf:Person', 'skmo:isComposedBy', 'mo:Track')]
----------------------------------------------------------------------------------------------------
('mo:Track', 'Any P', 'skmo:Genre')의 shortest path : 
[('mo:Track', 'skmo:hasGenre', 'skmo:Genre')]
----------------------------------------------------------------------------------------------------
('mo:Track', 'skmo:hasGenre', 'owl:Thing')의 shortest path : 
[('mo:Track', 'skmo:hasGenre', 'skmo:Genre'), ('skmo:Genre', 'rdfs:subClassOf', 'owl:Thing')]
----------------------------------------------------------------------------------------------------


In [20]:
query_graph = []
for cg in conceptual_graph:
    query_graph_candidates = []
    for ca in cg:
        query_graph_candidates.append(ca2sp[ca])
    for qg in itertools.product(*query_graph_candidates):
        sp_list = []
        query_graph_score = 0
        for arc_score, sp in qg:
            query_graph_score += arc_score
            sp_list.append(sp)
        query_graph.append((query_graph_score, sp_list))

for score, qg in query_graph:
    print(score, qg)

print(f'\n: 총 {len(query_graph)}개의 query graph candidates generated')

2 [[('foaf:Person', 'skmo:isComposedBy', 'mo:Track')], [('mo:Track', 'skmo:hasGenre', 'skmo:Genre')]]
3 [[('foaf:Person', 'skmo:isComposedBy', 'mo:Track')], [('mo:Track', 'skmo:hasGenre', 'skmo:Genre'), ('skmo:Genre', 'rdfs:subClassOf', 'owl:Thing')]]

: 총 2개의 query graph candidates generated


In [21]:
new_query_graph = []
for score, qg in query_graph:
    if qg[-1][-1][1] != 'rdfs:subClassOf' and qg[0][0][1] != 'rdfs:subClassOf':
        new_query_graph.append((score/len(qg),qg))
        continue
    
    if qg[-1][-1][1] == 'rdfs:subClassOf':
        new_qg = []
        new_score = score
        terminated = False

        for sp in qg[::-1]:
            n_delete = 0
            if terminated:
                new_qg.insert(0,sp)
                continue
            for arc in sp[::-1]:
                if arc[1] == 'rdfs:subClassOf':
                    n_delete+=1
                    new_score-=1
                else: 
                    terminated = True
                    break
            if len(sp)-n_delete == 0: continue
            new_qg.insert(0, sp[:len(sp)-n_delete])
        qg = new_qg

    if qg[0][0][1] == 'rdfs:subClassOf':
        new_qg = []
        terminated = False

        for sp in qg:
            n_delete = 0
            if terminated:
                new_qg.append(sp)
                continue
            for arc in sp:
                if arc[1] == 'rdfs:subClassOf':
                    n_delete+=1
                    new_score-=1
                else: 
                    terminated = True
                    break
            if len(sp)-n_delete == 0: continue
            new_qg.append(sp[n_delete:])
        
    if len(new_qg) == 0: continue
    
    if (new_score/len(new_qg), new_qg) in new_query_graph : continue
    new_query_graph.append((new_score/len(new_qg), new_qg))

In [22]:
for score, qg in new_query_graph:
    print(qg)

print(f'\n: 총 {len(new_query_graph)}개의 query graph candidates generated')

[[('foaf:Person', 'skmo:isComposedBy', 'mo:Track')], [('mo:Track', 'skmo:hasGenre', 'skmo:Genre')]]

: 총 1개의 query graph candidates generated


In [23]:
final_query_graph = sorted(new_query_graph)[0][1]
final_query_graph

[[('foaf:Person', 'skmo:isComposedBy', 'mo:Track')],
 [('mo:Track', 'skmo:hasGenre', 'skmo:Genre')]]