In [1]:
import sys
sys.path.append('../')
from setting import config_read

In [2]:
from owlready2 import get_ontology
import pandas as pd
from elasticsearch import Elasticsearch



In [3]:
# Read config file
config = config_read('../')

# Load owl file
data_path = config['owl']['path']
onto = get_ontology(data_path).load()

In [6]:
# Elasticsearch
server_ip = config['elasticsearch']['ip']
index_name = config['elasticsearch']['name']
es = Elasticsearch(server_ip)

In [7]:
index_list = []
index_list.extend([(r.iri, "T_c", r.label) for r in onto.classes()])
index_list.extend([(r.iri, "T_op", r.label) for r in onto.object_properties()])
index_list.extend([(r.iri, "T_dp", r.label) for r in onto.data_properties()])
index_list.extend([(r.iri, "T_i", r.label) for r in onto.individuals()])

# for r in onto.individuals():
#     annotation_values = [*r.label]
#     for p in r.get_properties():
#         if p._name in ['realName', 'name', 'title']:
#             av = getattr(r, p._name)
#             if av in annotation_values: continue
#             annotation_values.append(av)
#     index_list.append((r.iri, "T_i", annotation_values))

In [8]:
df = pd.DataFrame(index_list, columns=['URI', 'Type', 'Annotation Values'])

# Remain only resources with annotation values
cond = df["Annotation Values"].apply(len) != 0
df = df[cond].reset_index(drop=True)
df

Unnamed: 0,URI,Type,Annotation Values
0,http://schema.org/MusicAlbum,T_c,"[앨범, 음악 앨범]"
1,http://xmlns.com/foaf/0.1/Organization,T_c,"[연예기획사, 기관, 조직, 회사]"
2,http://xmlns.com/foaf/0.1/Person,T_c,"[인물, 사람]"
3,http://www.sktelecom.com/skmo/Language,T_c,[언어]
4,http://purl.org/ontology/mo/Track,T_c,"[곡, 노래, 수록곡]"
...,...,...,...
549,http://www.sktelecom.com/timeUnit#7일,T_i,"[7일, 칠일]"
550,http://www.sktelecom.com/timeUnit#8월,T_i,"[팔월, 8월]"
551,http://www.sktelecom.com/timeUnit#8일,T_i,"[팔일, 8일]"
552,http://www.sktelecom.com/timeUnit#9월,T_i,"[구월, 9월]"


In [9]:
if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name,ignore=[400, 404])
    print('Index has been deleted successfully')

Index has been deleted successfully


In [10]:
for i in range(len(df)) :
    doc = {'Annotation Values':df["Annotation Values"][i],}
    es.index(index=index_name, body=doc)

In [11]:
search_query = {"query":{"term":{"Annotation Values.keyword": "노래"}}}
es.search(index=index_name, body=search_query)

{'took': 55,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 7.171753,
  'hits': [{'_index': 'skmo',
    '_id': 'D2tynIoB6175Wi5vE2PB',
    '_score': 7.171753,
    '_source': {'Annotation Values': ['곡', '노래', '수록곡']}}]}}