# B.1 TBOX definition

In [11]:
from rdflib import Graph, Namespace, Literal
from rdflib.namespace import RDFS, RDF, XSD

Define the namespace

In [12]:
LAB = Namespace('http://www.example.edu/')

In this section, we want to define the TBOX, that is, the conceptual level of entities and their inter-relationships.

In [13]:
g = Graph()
g.bind("LAB", LAB)

## Definition

### Class

In [None]:
g.add((LAB.paper,RDF.type,RDFS.Class))
g.add((LAB.person,RDF.type,RDFS.Class))
g.add((LAB.venue,RDF.type,RDFS.Class))
g.add((LAB.proceeding,RDF.type,RDFS.Class))
g.add((LAB.keyword,RDF.type,RDFS.Class))
g.add((LAB.journal,RDF.type,RDFS.Class))
g.add((LAB.published_in,RDF.type,RDFS.Class))
g.add((LAB.writes,RDF.type,RDFS.Class))
g.add((LAB.review,RDF.type,RDFS.Class))

### Subclasses

In [None]:
g.add((LAB.author,RDFS.subClassOf,LAB.person))
g.add((LAB.reviewer,RDFS.subClassOf,LAB.person))
g.add((LAB.chair,RDFS.subClassOf,LAB.person))
g.add((LAB.cited_paper,RDFS.subClassOf,LAB.paper))
g.add((LAB.conference,RDFS.subClassOf,LAB.venue))
g.add((LAB.workshop,RDFS.subClassOf,LAB.venue))

### Properties

In [None]:
# For the properties of paper:
g.add((LAB.paper_id,RDF.type,RDF.Property))
g.add((LAB.paper_id,RDFS.domain,LAB.paper))

g.add((LAB.paper_title,RDF.type,RDF.Property))
g.add((LAB.paper_title,RDFS.domain,LAB.paper))

g.add((LAB.doi,RDF.type,RDF.Property))
g.add((LAB.doi,RDFS.domain,LAB.paper))

g.add((LAB.link,RDF.type,RDF.Property))
g.add((LAB.link,RDFS.domain,LAB.paper))

g.add((LAB.abstract,RDF.type,RDF.Property))
g.add((LAB.abstract,RDFS.domain,LAB.paper))

g.add((LAB.num_pages,RDF.type,RDF.Property))
g.add((LAB.num_pages,RDFS.domain,LAB.paper))

# For the properties of person:
g.add((LAB.person_id,RDF.type,RDF.Property))
g.add((LAB.person_id,RDFS.domain,LAB.person))

g.add((LAB.person_name,RDF.type,RDF.Property))
g.add((LAB.person_name,RDFS.domain,LAB.person))

g.add((LAB.email,RDF.type,RDF.Property))
g.add((LAB.email,RDFS.domain,LAB.person))

g.add((LAB.department,RDF.type,RDF.Property))
g.add((LAB.department,RDFS.domain,LAB.person))

g.add((LAB.institution,RDF.type,RDF.Property))
g.add((LAB.institution,RDFS.domain,LAB.person))

# For the properties of journal:
g.add((LAB.journal_id,RDF.type,RDF.Property))
g.add((LAB.journal_id,RDFS.domain,LAB.journal))

g.add((LAB.journal_name,RDF.type,RDF.Property))
g.add((LAB.journal_name,RDFS.domain,LAB.journal))

# For the properties of venue:
g.add((LAB.venue_id,RDF.type,RDF.Property))
g.add((LAB.venue_id,RDFS.domain,LAB.venue))

g.add((LAB.venue_name,RDF.type,RDF.Property))
g.add((LAB.venue_name,RDFS.domain,LAB.venue))

g.add((LAB.venue_year,RDF.type,RDF.Property))
g.add((LAB.venue_year,RDFS.domain,LAB.venue))

g.add((LAB.venue_edition,RDF.type,RDF.Property))
g.add((LAB.venue_edition,RDFS.domain,LAB.venue))

# For the properties of proceeding:
g.add((LAB.proceeding_id,RDF.type,RDF.Property))
g.add((LAB.proceeding_id,RDFS.domain,LAB.proceeding))

g.add((LAB.proceeding_name,RDF.type,RDF.Property))
g.add((LAB.proceeding_name,RDFS.domain,LAB.proceeding))

g.add((LAB.city,RDF.type,RDF.Property))
g.add((LAB.city,RDFS.domain,LAB.proceeding))

# For the properties of keyword:
g.add((LAB.keyword_id,RDF.type,RDF.Property))
g.add((LAB.keyword_id,RDFS.domain,LAB.keyword))

g.add((LAB.keyword_name,RDF.type,RDF.Property))
g.add((LAB.keyword_name,RDFS.domain,LAB.keyword))

g.add((LAB.domain,RDF.type,RDF.Property))
g.add((LAB.domain,RDFS.domain,LAB.keyword))

# For the properties of writes:
g.add((LAB.is_corresponding_author,RDF.type,RDF.Property))
g.add((LAB.is_corresponding_author,RDFS.domain,LAB.writes))

# For the properties of published_in:
g.add((LAB.volume,RDF.type,RDF.Property))
g.add((LAB.volume,RDFS.domain,LAB.published_in))

g.add((LAB.publishing_year,RDF.type,RDF.Property))
g.add((LAB.publishing_year,RDFS.domain,LAB.published_in))

# For the properties of review:
g.add((LAB.acceptanceProbability,RDF.type,RDF.Property))
g.add((LAB.acceptanceProbability,RDFS.domain,LAB.review))

### Relationships

In [14]:
# The relationship between paper and venue:
g.add((LAB.presented_in,RDF.type, RDF.Property))
g.add((LAB.presented_in,RDFS.domain,LAB.paper))
g.add((LAB.presented_in,RDFS.range,LAB.venue))
g.add((LAB.paper,LAB.presented_in,LAB.venue))

# The relationship between venue and proceeding:
g.add((LAB.part_of,RDF.type, RDF.Property))
g.add((LAB.part_of,RDFS.domain,LAB.venue))
g.add((LAB.part_of,RDFS.range,LAB.proceeding))
g.add((LAB.venue,LAB.part_of,LAB.proceeding))

# The relationship between paper and keyword:
g.add((LAB.has_keyword,RDF.type, RDF.Property))
g.add((LAB.has_keyword,RDFS.domain,LAB.paper))
g.add((LAB.has_keyword,RDFS.range,LAB.keyword))
g.add((LAB.paper,LAB.has_keyword,LAB.keyword))

# The relationship between paper and cited_paper:
g.add((LAB.cited,RDF.type, RDF.Property))
g.add((LAB.cited,RDFS.domain,LAB.paper))
g.add((LAB.cited,RDFS.range,LAB.cited_paper))
g.add((LAB.paper,LAB.cited,LAB.cited_paper))

# The relationship between review and paper:
g.add((LAB.reviews,RDF.type, RDF.Property))
g.add((LAB.reviews,RDFS.domain,LAB.review))
g.add((LAB.reviews,RDFS.range,LAB.paper))
g.add((LAB.review,LAB.reviews,LAB.paper))

# The relationship between review and reviewer:
g.add((LAB.reviewed_by,RDF.type, RDF.Property))
g.add((LAB.reviewed_by,RDFS.domain,LAB.review))
g.add((LAB.reviewed_by,RDFS.range,LAB.reviewer))
g.add((LAB.review,LAB.reviewed_by,LAB.reviewer))

# The relationship between writes and author:
g.add((LAB.written_by,RDF.type, RDF.Property))
g.add((LAB.written_by,RDFS.domain,LAB.writes))
g.add((LAB.written_by,RDFS.range,LAB.author))
g.add((LAB.writes,LAB.written_by,LAB.author))

# The relationship between writes and paper:
g.add((LAB.has_written,RDF.type, RDF.Property))
g.add((LAB.has_written,RDFS.domain,LAB.writes))
g.add((LAB.has_written,RDFS.range,LAB.paper))
g.add((LAB.writes,LAB.has_written,LAB.paper))

# The relationship beteen published_in and journal:
g.add((LAB.in_journal,RDF.type, RDF.Property))
g.add((LAB.in_journal,RDFS.domain,LAB.published_in))
g.add((LAB.in_journal,RDFS.range,LAB.journal))
g.add((LAB.published_in,LAB.in_journal,LAB.journal))

# The relationship between published_in and paper:
g.add((LAB.published_paper,RDF.type, RDF.Property))
g.add((LAB.published_paper,RDFS.domain,LAB.published_in))
g.add((LAB.published_paper,RDFS.range,LAB.paper))
g.add((LAB.published_in,LAB.published_paper,LAB.paper))

# The relationship between venue and chair:
g.add((LAB.is_chair,RDF.type, RDF.Property))
g.add((LAB.is_chair,RDFS.domain,LAB.published_in))
g.add((LAB.is_chair,RDFS.range,LAB.paper))
g.add((LAB.venue,LAB.is_chair,LAB.chair))

<Graph identifier=Nfd219cc7370242a79d81f357766b9269 (<class 'rdflib.graph.Graph'>)>

## Serialize

Finally, we can save the file.

In [15]:
g.serialize('tbox.ttl',format='turtle')

<Graph identifier=Nfd219cc7370242a79d81f357766b9269 (<class 'rdflib.graph.Graph'>)>

# B.2 ABOX definition

First, let's load all the data:

In [16]:
import pandas as pd

dtype_semantics = {'ID': str}
dtype_relations = {'START_ID': str,'END_ID': str}

reviews_df = pd.read_csv("data/author_review_papers.csv", dtype=dtype_relations)
writes_df = pd.read_csv('data/author_writes_papers.csv', dtype=dtype_relations)
author_df = pd.read_csv('data/authors_semantics.csv', dtype=dtype_semantics)
part_of_df = pd.read_csv('data/conference_part_of_proceeding.csv', dtype=dtype_relations)
venue_df = pd.read_csv('data/conference_semantics.csv', dtype=dtype_semantics)
journal_df = pd.read_csv('data/journal_semantics.csv', dtype=dtype_semantics)
keywords_df = pd.read_csv('data/keywords_semantics.csv', dtype=dtype_semantics)
cites_df = pd.read_csv('data/paper_cites_paper.csv', dtype=dtype_relations)
has_df = pd.read_csv('data/paper_has_keywords.csv', dtype=dtype_semantics)
presented_in_df = pd.read_csv('data/paper_presented_in_conference.csv', dtype=dtype_relations)
published_in_df = pd.read_csv('data/paper_published_in_journal.csv', dtype=dtype_relations)
paper_df = pd.read_csv('data/paper_semantics.csv', dtype=dtype_semantics)
proceeding_df = pd.read_csv("data/proceeding_semantics.csv", dtype=dtype_relations)

Since there is some features that our data is missing let's add them now. It can be seen that the data only contains conferences, that is because all the events has been classified as conference. Let's divide them into either conference or workshop. In addition, we will add a conference chair.

In [17]:
venue_df = (
    venue_df
    # If name contains the word Conference, we consider it a conference, if not, a workshop
    .assign(is_conference=lambda x: x['name'].str.contains('Conference', case=False))
    # The conference chair should be an author himself
    .assign(chair_ID=author_df['ID'].sample().iloc[0])
)
### This code has a problem, chair_ID is always the same and that cannot be the case. I have to fix this!!!!
venue_df.head()

Unnamed: 0,ID,name,year,edition,is_conference,chair_ID
0,b55b50b1-aae7-47a7-b042-8aecc930073d,International Conference on Human Factors in C...,2021,1,True,1949898
1,2c9ecac6-f875-4a9b-acc2-10bd9f6782df,Technical Symposium on Computer Science Education,2019,1,False,1949898
2,c85dfc25-bcef-4719-9997-f41ad334d998,International Symposium on Intelligent Data An...,2018,1,False,1949898
3,a88140cf-3ca8-46c2-b1f7-446f137cf336,Frontiers in Education Conference,2018,1,True,1949898
4,d732841e-83f9-49ec-95ca-389e5568634b,Annual Conference on Genetic and Evolutionary ...,2016,1,True,1949898


## Loading

### Paper

In [18]:
for index,row in paper_df.iterrows():
    # Connect with the element
    g.add((LAB.paper+"/"+str(row['ID']),RDF.type,LAB.paper))
    # Load all the properties
    g.add((LAB.paper+"/"+str(row['ID']),LAB.paper_id, Literal(row['ID'], datatype=XSD.string)))
    g.add((LAB.paper+"/"+str(row['ID']),LAB.paper_title, Literal(row['title'], datatype=XSD.string)))
    g.add((LAB.paper+"/"+str(row['ID']),LAB.abstract, Literal(row['abstract'], datatype=XSD.string)))
    g.add((LAB.paper+"/"+str(row['ID']),LAB.num_pages, Literal(row['pages'], datatype=XSD.string)))
    g.add((LAB.paper+"/"+str(row['ID']),LAB.doi, Literal(row['doi'], datatype=XSD.string)))
    g.add((LAB.paper+"/"+str(row['ID']),LAB.link, Literal(row['link'], datatype=XSD.string)))

### Author

In [19]:
for index,row in author_df.iterrows():
    # Connect with the element
    g.add((LAB.author+"/"+str(row['ID']),RDF.type,LAB.author))
    # Load all the properties
    g.add((LAB.author+"/"+str(row['ID']),LAB.person_id, Literal(row['ID'], datatype=XSD.string)))
    g.add((LAB.author+"/"+str(row['ID']),LAB.person_name, Literal(row['name'], datatype=XSD.string)))
    g.add((LAB.author+"/"+str(row['ID']),LAB.email, Literal(row['email'], datatype=XSD.string)))
    g.add((LAB.author+"/"+str(row['ID']),LAB.department, Literal(row['department'], datatype=XSD.string)))
    g.add((LAB.author+"/"+str(row['ID']),LAB.institution, Literal(row['institution'], datatype=XSD.string)))

### Conference/workshop

In [21]:
for index,row in venue_df.iterrows():
    if row['is_conference'] == True:
        # Connect with the element
        g.add((LAB.conference+"/"+str(row['ID']),RDF.type,LAB.conference))
        # Load all the properties
        g.add((LAB.conference+"/"+str(row['ID']),LAB.venue_id, Literal(row['ID'], datatype=XSD.string)))
        g.add((LAB.conference+"/"+str(row['ID']),LAB.venue_name, Literal(row['name'], datatype=XSD.string)))
        g.add((LAB.conference+"/"+str(row['ID']),LAB.venue_year, Literal(row['year'], datatype=XSD.string)))
        g.add((LAB.conference+"/"+str(row['ID']),LAB.venue_edition, Literal(row['edition'], datatype=XSD.string)))
    else:
        # Connect with the element
        g.add((LAB.workshop+"/"+str(row['ID']),RDF.type,LAB.workshop))
        # Load all the properties
        g.add((LAB.workshop+"/"+str(row['ID']),LAB.venue_id, Literal(row['ID'], datatype=XSD.string)))
        g.add((LAB.workshop+"/"+str(row['ID']),LAB.venue_name, Literal(row['name'], datatype=XSD.string)))
        g.add((LAB.workshop+"/"+str(row['ID']),LAB.venue_year, Literal(row['year'], datatype=XSD.string)))
        g.add((LAB.workshop+"/"+str(row['ID']),LAB.venue_edition, Literal(row['edition'], datatype=XSD.string)))

### Journal

In [22]:
for index,row in journal_df.iterrows():
    # Connect with the element
    g.add((LAB.journal+"/"+str(row['ID']),RDF.type,LAB.journal))
    # Load all the properties
    g.add((LAB.journal+"/"+str(row['ID']),LAB.journal_id, Literal(row['ID'], datatype=XSD.string)))
    g.add((LAB.journal+"/"+str(row['ID']),LAB.journal_name, Literal(row['name'], datatype=XSD.string)))

### Keyword

In [23]:
for index,row in keywords_df.iterrows():
    # Connect with the element
    g.add((LAB.keyword+"/"+str(row['ID']),RDF.type,LAB.keyword))
    # Load all the properties
    g.add((LAB.keyword+"/"+str(row['ID']),LAB.keyword_id, Literal(row['ID'], datatype=XSD.string)))
    g.add((LAB.keyword+"/"+str(row['ID']),LAB.keyword_name, Literal(row['name'], datatype=XSD.string)))
    g.add((LAB.keyword+"/"+str(row['ID']),LAB.domain, Literal(row['domain'], datatype=XSD.string)))

### Proceeding

In [24]:
for index,row in proceeding_df.iterrows():
    # Connect with the element
    g.add((LAB.proceeding+"/"+str(row['ID']),RDF.type,LAB.proceeding))
    # Load all the properties
    g.add((LAB.proceeding+"/"+str(row['ID']),LAB.proceeding_id, Literal(row['ID'], datatype=XSD.string)))
    g.add((LAB.proceeding+"/"+str(row['ID']),LAB.proceeding_name, Literal(row['name'], datatype=XSD.string)))
    g.add((LAB.proceeding+"/"+str(row['ID']),LAB.city, Literal(row['city'], datatype=XSD.string)))


### Writes

In [25]:
for index,row in writes_df.iterrows():
    # Connect with the element
    g.add((LAB.writes+"/"+(str(row['START_ID'])+str(row['END_ID'])),RDF.type,LAB.writes))
    # Load all the properties
    g.add((LAB.writes+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.is_corresponding_author, Literal(row['corresponding_author'], datatype=XSD.string)))

### Published_in

In [26]:
for index,row in published_in_df.iterrows():
    # Connect with the element
    g.add((LAB.published_in+"/"+(str(row['START_ID'])+str(row['END_ID'])),RDF.type,LAB.published_in))
    # Load all the properties
    g.add((LAB.published_in+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.volume, Literal(row['volume'], datatype=XSD.string)))
    g.add((LAB.published_in+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.publishing_year, Literal(row['year'], datatype=XSD.string)))

### Review

In [27]:
for index,row in reviews_df.iterrows():
    # Connect with the element, notice we are using the ID of author for identifying review
    g.add((LAB.review+"/"+(str(row['START_ID'])+str(row['END_ID'])),RDF.type,LAB.review))
    # Load all the properties
    g.add((LAB.review+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.comment, Literal(row['comment'], datatype=XSD.string)))
    g.add((LAB.review+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.acceptanceProbability, Literal(row['acceptanceProbability'], datatype=XSD.string)))

### Cited_paper

In [28]:
for index,row in cites_df.iterrows():
    # Connect with the element, notice we are using the ID of paper for identifying cited_paper
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),RDF.type,LAB.cited_paper))
    # Load all the properties
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),LAB.paper_id, Literal(row['END_ID'], datatype=XSD.string)))
    matching_row = paper_df.loc[paper_df['ID'] == row['END_ID'], ['title']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),LAB.paper_title, Literal(value, datatype=XSD.string)))
    matching_row = paper_df.loc[paper_df['ID'] == row['END_ID'], ['abstract']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),LAB.abstract, Literal(value, datatype=XSD.string)))
    matching_row = paper_df.loc[paper_df['ID'] == row['END_ID'], ['pages']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),LAB.num_pages, Literal(value, datatype=XSD.string)))
    matching_row = paper_df.loc[paper_df['ID'] == row['END_ID'], ['doi']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),LAB.doi, Literal(value, datatype=XSD.string)))
    matching_row = paper_df.loc[paper_df['ID'] == row['END_ID'], ['link']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.cited_paper+"/"+str(row['END_ID']),LAB.link, Literal(value, datatype=XSD.string)))

### Reviewer

In [29]:
for index,row in reviews_df.iterrows():
    # Connect with the element, notice we are using the ID of author for identifying reviewer
    g.add((LAB.reviewer+"/"+str(row['START_ID']),RDF.type,LAB.reviewer))
    # Load all the properties
    g.add((LAB.reviewer+"/"+str(row['START_ID']),LAB.person_id, Literal(row['START_ID'], datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['START_ID'], ['name']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.reviewer+"/"+str(row['START_ID']),LAB.person_name, Literal(value, datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['START_ID'], ['email']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.reviewer+"/"+str(row['START_ID']),LAB.email, Literal(value, datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['START_ID'], ['department']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.reviewer+"/"+str(row['START_ID']),LAB.department, Literal(value, datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['START_ID'], ['institution']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.reviewer+"/"+str(row['START_ID']),LAB.institution, Literal(value, datatype=XSD.string)))

### Chair

In [30]:
for index,row in venue_df.iterrows():
    # Connect with the element
    g.add((LAB.chair+"/"+str(row['chair_ID']),RDF.type,LAB.chair))
    # Load all the properties
    g.add((LAB.chair+"/"+str(row['chair_ID']),LAB.person_id, Literal(row['chair_ID'], datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['chair_ID'], ['name']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.chair+"/"+str(row['chair_ID']),LAB.person_name, Literal(value, datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['chair_ID'], ['email']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.chair+"/"+str(row['chair_ID']),LAB.email, Literal(value, datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['chair_ID'], ['department']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.chair+"/"+str(row['chair_ID']),LAB.department, Literal(value, datatype=XSD.string)))
    matching_row = author_df.loc[author_df['ID'] == row['chair_ID'], ['institution']]
    value = matching_row.iloc[0, 0] if not matching_row.empty else None
    g.add((LAB.chair+"/"+str(row['chair_ID']),LAB.institution, Literal(value, datatype=XSD.string)))

### Written_by

In [31]:
# This is the relation between writes and author
for index,row in writes_df.iterrows():
    g.add((LAB.writes+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.written_by,LAB.author+"/"+str(row['START_ID'])))

### Has_written

In [32]:
# This is the relation between writes and paper
for index,row in writes_df.iterrows():
    g.add((LAB.writes+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.has_written,LAB.paper+"/"+str(row['END_ID'])))

### Has_keyword

In [33]:
# This is the relation between paper and keyword
for index,row in has_df.iterrows():
    g.add((LAB.paper+"/"+str(row['START_ID']),LAB.has_keyword,LAB.keyword+"/"+str(row['END_ID'])))

### Presented_in

In [34]:
# This is the relation between paper and venue
for index, row in presented_in_df.iterrows():
    end_id = row['END_ID']
    venue_row = venue_df[venue_df['ID'] == end_id]  # Filter venue_df for matching ID
    if not venue_row.empty:  # Check if venue_row is not empty (ID exists in venue_df)
        is_conference = venue_row['is_conference'].iloc[0]  # Get the 'is_conference' value
        if is_conference:
            g.add((LAB.paper+"/"+str(row['START_ID']), LAB.presented_in, LAB.conference+"/"+str(end_id)))
        else:
            g.add((LAB.paper+"/"+str(row['START_ID']), LAB.presented_in, LAB.workshop+"/"+str(end_id)))
    else:
        # Handle case where ID does not exist in venue_df (if needed)
        pass  # Placeholder for additional logic if required


### Part_of

In [35]:
for index, row in presented_in_df.iterrows():
    start_id = row['START_ID']
    end_id = row['END_ID']
    venue_row = venue_df[venue_df['ID'] == start_id]  # Filter venue_df for matching ID
    if not venue_row.empty:  # Check if venue_row is not empty (ID exists in venue_df)
        is_conference = venue_row['is_conference'].iloc[0]  # Get the 'is_conference' value
        if is_conference:
            g.add((LAB.conference+"/"+str(start_id), LAB.part_of, LAB.proceeding+"/"+str(end_id)))
        else:
            g.add((LAB.workshop+"/"+str(start_id), LAB.part_of, LAB.proceeding+"/"+str(end_id)))
    else:
        # Handle case where ID does not exist in venue_df (if needed)
        pass  # Placeholder for additional logic if required


### Is_chair

In [36]:
for index, row in venue_df.iterrows():
    row_venue_id = row['ID']
    row_chair_id = row['chair_ID']
    venue_row = venue_df[venue_df['ID'] == row_venue_id]  # Filter venue_df for matching ID
    if not venue_row.empty:  # Check if venue_row is not empty (ID exists in venue_df)
        is_conference = venue_row['is_conference'].iloc[0]  # Get the 'is_conference' value
        if is_conference:
            g.add((LAB.conference+"/"+str(row_venue_id), LAB.is_chair, LAB.chair+"/"+str(row_chair_id)))
        else:
            g.add((LAB.workshop+"/"+str(row_venue_id), LAB.is_chair, LAB.chair+"/"+str(row_chair_id)))
    else:
        # Handle case where ID does not exist in venue_df (if needed)
        pass  # Placeholder for additional logic if required

### Published_paper

In [37]:
# This is the relation between published_in and paper
for index,row in published_in_df.iterrows():
    g.add((LAB.published_in+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.published_paper,LAB.paper+"/"+str(row['START_ID'])))

### In_journal

In [38]:
# This is the relation between published_in and journal
for index,row in published_in_df.iterrows():
    g.add((LAB.published_in+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.in_journal,LAB.journal+"/"+str(row['END_ID'])))

### Cited

In [39]:
# This is the relation between paper and cited_paper
for index,row in cites_df.iterrows():
    g.add((LAB.paper+"/"+str(row['START_ID']),LAB.cited,LAB.cited_paper+"/"+str(row['END_ID'])))

### Reviews

In [40]:
# This is the relation between review and paper
for index,row in reviews_df.iterrows():
    g.add((LAB.review+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.reviews,LAB.paper+"/"+str(row['END_ID'])))

### Reviewed_by

In [41]:
# This is the relation between review and reviewer
for index,row in reviews_df.iterrows():
    g.add((LAB.review+"/"+(str(row['START_ID'])+str(row['END_ID'])),LAB.reviewed_by,LAB.reviewer+"/"+str(row['START_ID'])))

## Serialize
Finally we can save the final graph

In [42]:
g.serialize('abox.ttl',format='turtle')

<Graph identifier=Nfd219cc7370242a79d81f357766b9269 (<class 'rdflib.graph.Graph'>)>

Bugs del grafo:
1. Si miro un paper y alguna de las personas que han hecho review de este, puedo saber quien es esa persona, pero no puedo mirar directamente que papers han escrito esas personas. La verdad es que no se si debería ser capaz de hacerlo, pero la lógica me dice que debería ser posible.