Create Ontology

In [61]:
from owlready2 import *

In [62]:
from  common.cleaner import clean_char

def _escape_value(text: str) -> str:
    """Escape the illegal characters for an ontology property"""
    if text is None:
        return None
    # function to escape XML character data
    text = escape(text)
    text = text.replace('\n', '')
    text = text.replace('\r', '')
    text = text.replace('\f', '')
    text = text.replace('\b', '')
    text = text.replace('"', '')
    text = text.replace('[', '')
    text = text.replace(']', '')
    text = text.replace('{', '')
    text = text.replace('}', '')
    text = text.replace('#', '')
    text = text.replace('|', '')
    text = clean_char(text)
    return text

def _escape_iri(text: str) -> str:
    """For IRI, we replace space character by _"""
    if text is None:
        return None
    text = _escape_value(text)
    text = text.replace(' ', '_')
    return text


In [74]:
onto = get_ontology("http://authors_relations.org/onto.owl")

In [64]:
# Create classes

with onto :
    class Author(Thing):
        pass
    class ConnectedAuthor(Author):
        pass
    class InterestingAuthor(ConnectedAuthor):
        pass

    class Article(Thing):
        # name of an article is the pdf_id in arxiv
        pass


In [65]:
list(onto.classes())

[onto.Author, onto.ConnectedAuthor, onto.InterestingAuthor, onto.Article]

In [66]:
print(Author.subclasses())
print(ConnectedAuthor.ancestors())

<generator object EntityClass.subclasses at 0x000001BE7D11D9A0>
{owl.Thing, onto.ConnectedAuthor, onto.Author}


In [67]:
onto

get_ontology("http://authors_relations.org/onto.owl#")

In [75]:
# Save the ontology
onto.save(file = "authors_relations.owl")

In [76]:
# Create new class properties
with onto:
    class wrote_Article(Author >> Article):
        pass
    class quoted_Auteur(Author >> ConnectedAuthor):
        pass
    class qutoted_by(ConnectedAuthor >> Author):
        inverse_property = quoted_Auteur
    

Import data

In [34]:
# Import the Data to put in ontology
# importing the module
import json
 
# Opening JSON file with references
with open('pdf_references.json') as json_file:
    prediction = json.load(json_file)
prediction = prediction['References']

# Opening JSON file with metadata
with open('pdf_metadata.json') as json_file:
    metadata = json.load(json_file)
metadata = metadata['Papers']

print("References : "+str(len(prediction)))
print("Authors : "+str(len(metadata)))

References : 217
Authors : 216


Nettoyage de données

In [41]:
 for reference in prediction:

        for reference_line in reference:

            for author_reference in reference_line['list_authors']:
                if '|' in author_reference:
                    print(author_reference)
                

|T
≤ |A(s)| ·'
µh(ah|xh
|Ch(xh−2
µh(·|xh)||µt h(·|xh
µh(·|xh)||µt h(·|xh
µh(·|xh)||µt h(·|xh
||µt h′
||µt h′
||µt h′
||µt h′
µh(·|xh)||µt h(·|xh
∈ ∆(S1)}∪{ph(·|sh
∈ ∆(S1)}∪{ph(·|sh


In [71]:
for individual in Author.instances(): 
    destroy_entity(individual)

for individual in ConnectedAuthor.instances(): 
    destroy_entity(individual)

for individual in Article.instances(): 
    destroy_entity(individual)

In [73]:
for article in metadata:
    pdf_id = article['id']
    for reference in prediction:

        for reference_line in reference:
            for author_qui_cite in article['authors']:
                for author_reference in reference_line['list_authors']:
                    if pdf_id == reference_line['id']:
                        # identifier les noms
                        author_qui_cite_name = _escape_value(author_qui_cite)
                        author_reference_name = _escape_value(author_reference)
                        article_name = pdf_id
                        # identifier les classes
                        author_qui_cite_name = Author(_escape_iri(author_qui_cite))
                        author_reference_name = ConnectedAuthor(_escape_iri(author_reference))
                        article_name = Article(pdf_id)
                        # identifier les relations
                        # print(author_qui_cite + ' fait une citation de ' + author_reference + ' dans l\'article ' + pdf_id)
                        author_qui_cite_name.quoted_Auteur=[author_reference_name]
                        author_qui_cite_name.wrote_Article=[article_name]

In [71]:
author_qui_cite_name
author_reference_name

onto.Umar Iqbal

In [74]:
for rel in onto.quoted_Auteur.get_relations():
    print(rel)

(onto.Hongwei Yi, onto.Yichen Wei)
(onto.Chun-Hao P. Huang, onto.Yichen Wei)
(onto.Dimitrios Tzionas, onto.Yichen Wei)
(onto.Muhammed Kocabas, onto.Yichen Wei)
(onto.Mohamed Hassan, onto.Yichen Wei)
(onto.Siyu Tang, onto.Yichen Wei)
(onto.Justus Thies, onto.Yichen Wei)
(onto.Michael J. Black, onto.Yichen Wei)
(onto.Simone Parisi, onto.Quadro)
(onto.Aravind Rajeswaran, onto.Quadro)
(onto.Senthil Purushwalkam, onto.Quadro)
(onto.Abhinav Gupta, onto.Quadro)
(onto.Shubham Gupta, onto.KDD)
(onto.Sahil Manchanda, onto.KDD)
(onto.Srikanta Bedathur, onto.KDD)
(onto.Sayan Ranu, onto.KDD)
(onto.Dong-Ki Kim, onto.Shimon Whiteson)
(onto.Matthew Riemer, onto.Shimon Whiteson)
(onto.Miao Liu, onto.Shimon Whiteson)
(onto.Jakob N. Foerster, onto.Shimon Whiteson)
(onto.Michael Everett, onto.Shimon Whiteson)
(onto.Chuangchuang Sun, onto.Shimon Whiteson)
(onto.Gerald Tesauro, onto.Shimon Whiteson)
(onto.Jonathan P. How, onto.Shimon Whiteson)
(onto.Anna V. Kononova, onto.StDev)
(onto.Diederick Vermetten, o

In [None]:
# TODO
# qui a été cité combien de fois (ajouter deux règle)
# Règle 1 : Connected Author qui est cité plus de 50 fois est une InterestingAuthor
# Règle 2 : 

