In [73]:
import pandas as pd

from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF

### Parsing TBOX

In [74]:
URI = 'http://example.com/sdm_lab3'
onto = Namespace(URI + '#')

g = Graph()
g.parse("11-B1-GlennieParaskeva.rdf", format="xml")

<Graph identifier=N9f7f002be4cf4a189cc9966b42b01543 (<class 'rdflib.graph.Graph'>)>

### Methods for adding classes and properties

In [75]:
# Classes
def add_person(person):
    
    # Create URI using the person's ID
    id = URIRef(URI+'/' + person.ID)

    if person.Type == 'Author':
        g.add((id, RDF.type, onto.Author))
    elif person.Type == 'Reviewer':
        g.add((id, RDF.type, onto.Reviewer))
    elif person.Type == 'Chairperson':
        g.add((id, RDF.type, onto.Chairperson))
    elif person.Type == 'Editor':
        g.add((id, RDF.type, onto.Editor))

    g.add((id, onto.personName, Literal(person.Name)))


def add_area(area):
    
     # Create URI using the area's ID
    id = URIRef(URI+'/' + area.ID)

    g.add((id, RDF.type, onto.Area))

    g.add((id, onto.keywords, Literal(area.Keywords)))


def add_venue(venue):

    id = URIRef(URI+'/' + venue.ID)

    if venue.Type == 'Conference':
        if venue.ConferenceType == 'RegularConference':
            g.add((id, RDF.type, onto.RegularConference))
        elif venue.ConferenceType == 'Symposium':
            g.add((id, RDF.type, onto.Symposium))
        elif venue.ConferenceType == 'Workshop':
            g.add((id, RDF.type, onto.Workshop))
        elif venue.ConferenceType == 'ExpertGroup':
            g.add((id, RDF.type, onto.ExpertGroup))
        g.add((id, onto.venueName, Literal(venue.Name)))
            
    elif venue.Type == 'Journal':
        g.add((id, RDF.type, onto.Journal))
        g.add((id, onto.venueName, Literal(venue.Name)))



def add_paper(paper):

    id = URIRef(URI+'/' + paper.ID)

    if paper.PaperType == 'FullPaper':
        g.add((id, RDF.type, onto.FullPaper))
    elif paper.PaperType == 'DemoPaper':
        g.add((id, RDF.type, onto.DemoPaper))
    elif paper.PaperType == 'ShortPaper':
        g.add((id, RDF.type, onto.ShortPaper))
    
    # We check if the paper belongs to a conference before adding it as a Poster 
    elif paper.PaperType == 'Poster' and paper.VenueType == 'Conference':
        g.add((id, RDF.type, onto.Poster))

    g.add((id, onto.title, Literal(paper.Title)))


def add_review(review):

    id = URIRef(URI+'/' + review.ReviewID)

    g.add((id, RDF.type, onto.Review))

    g.add((id, onto.decision, Literal(review.Decision)))

    g.add((id, onto.reviewText, Literal(review.ReviewText)))


def add_volume(volume):

    id = URIRef(URI+'/' + volume.VolumeID)
   
    g.add((id, RDF.type, onto.Volume))

    g.add((id, onto.year, Literal(volume.volumeYear)))


def add_proceedings(proceedings):

    id = URIRef(URI+'/' + proceedings.ProceedingID)
   
    g.add((id, RDF.type, onto.Proceedings))

    g.add((id, onto.year, Literal(proceedings.proceedingsYear)))


# Properties

def submits(row):
    authorID = URIRef(URI+'/' + row.AuthorID)
    paperID = URIRef(URI+'/' + row.ID)

    g.add((authorID, onto.submits, paperID))

def submittedTo(row):
    paperID = URIRef(URI+'/' + row.ID)
    venueID = URIRef(URI+'/' + row.VenueID)

    g.add((paperID, onto.submittedTo, venueID))

# Constraint: Reviewer must be assigned to a paper to be able to hand in review 
def handsIn(row):
    reviewerID = URIRef(URI+'/' + row.ReviewerID)
    reviewID = URIRef(URI+'/' + row.ReviewID)

    paperID = URIRef(URI+'/' + row.PaperID)

    # Check if reviewer is assigned to paper
    if reviewerID in set(g.subjects(predicate=onto.assignedTo, object=paperID)):
        g.add((reviewerID, onto.handsIn, reviewID))


# Constraint: A reviewer can only be assigned to a paper if a responsible assigns them
def assignedTo(row):
    reviewerID = URIRef(URI+'/' + row.ReviewerID)
    paperID = URIRef(URI+'/' + row.PaperID)

    responsibleID = URIRef(URI+'/' + row.ResponsibleID)

    if reviewerID in set(g.objects(predicate=onto.assigns, subject=responsibleID)):
        g.add((reviewerID, onto.assignedTo, paperID))

venue_df = pd.read_csv('../data/venue.csv')
paper_df = pd.read_csv('../data/paper.csv')
review_df = pd.read_csv('../data/review.csv')
# Constraint: Responsible assigns at least 2 reviewers to a paper
def assigns(row):
    venue_id = paper_df.loc[paper_df['ID'] == row.PaperID, 'VenueID'].values[0]
    responsibleID = venue_df.loc[venue_df['ID'] == venue_id, 'ResponsibleID'].values
    
    # Check if the responsible assigning the reviewer is a responsible of the venue
    if row.ResponsibleID in responsibleID:
        # We get the list of reviewers for the submitted paper
        reviewers = review_df.loc[review_df['PaperID'] == row.PaperID, 'ReviewerID'].values

        # We remove duplicate reviewers in the case that there are any
        reviewers = set(reviewers)

        # We check that there are at least two reviewers per submitted paper
        if len(reviewers) >= 2:
            responsibleID = URIRef(URI+'/' + row.ResponsibleID)
            reviewerID = URIRef(URI+'/' + row.ReviewerID)

            g.add((responsibleID, onto.assigns, reviewerID))

# Constraint: Paper can only be included if it is published and been submitted to the journal of the volume
def includedInVolume(row):

    if row.Published and row.VenueType == 'Journal':
        paperID = URIRef(URI+'/' + row.ID)
        venue = list(set(g.objects(predicate=onto.submittedTo, subject=paperID)))[0]
        venueID = URIRef(URI+'/' + row.VenueID)
        if venue == venueID:
            volumeID = URIRef(URI+'/' + row.PublishedIn)
            g.add((paperID, onto.includedInVolume, volumeID))

# Constraint: Paper can only be included if it is published and been submitted to the conference of the proceeding
def includedInProceedings(row):
    if row.Published and row.VenueType == 'Conference':
        paperID = URIRef(URI+'/' + row.ID)
        venue = list(set(g.objects(predicate=onto.submittedTo, subject=paperID)))[0]
        venueID = URIRef(URI+'/' + row.VenueID)
        if venue == venueID:
            proceedingID = URIRef(URI+'/' + row.PublishedIn)
            g.add((paperID, onto.includedInProceedings, proceedingID))

def paperRelatedTo(row):
    paperID = URIRef(URI+'/' + row.ID)
    areaID = URIRef(URI+'/' + row.Area)

    g.add((paperID, onto.paperRelatedTo, areaID))

def venueRelatedTo(row):
    venueID = URIRef(URI+'/' + row.ID)
    areaID = URIRef(URI+'/' + row.Area)

    g.add((venueID, onto.venueRelatedTo, areaID))

def isPartOfJournal(row):
    volumeID = URIRef(URI+'/' + row.VolumeID)
    journalID = URIRef(URI+'/' + row.JournalID)

    g.add((volumeID, onto.isPartOfJournal, journalID))

def isPartOfConference(row):
    proceedingID = URIRef(URI+'/' + row.ProceedingID)
    conferenceID = URIRef(URI+'/' + row.ConferenceID)

    g.add((proceedingID, onto.isPartOfConference, conferenceID))

# Constraint: Only Conferences
def handledByChairperson(row):
    if row.Type == 'Conference':
        conferenceID = URIRef(URI+'/' + row.ID)
        chairpersonID = URIRef(URI+'/' + row.ResponsibleID)

        g.add((conferenceID, onto.handledByChairperson, chairpersonID))

# Constraint: Only Journals
def handledByEditor(row):
    if row.Type == 'Journal':
        journalID = URIRef(URI+'/' + row.ID)
        editorID = URIRef(URI+'/' + row.ResponsibleID)

        g.add((journalID, onto.handledByEditor, editorID))

### Loading Instances

In [76]:
# Loading people
# person_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/people.csv')
person_df = pd.read_csv('../data/people.csv')
person_df.apply(add_person, axis=1)

# Loading area
# area_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/area.csv')
area_df = pd.read_csv('../data/area.csv')
area_df.apply(add_area, axis=1)

# Loading venue
# venue_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/venue.csv')
venue_df = pd.read_csv('../data/venue.csv')
venue_df.apply(add_venue, axis=1)
venue_df.apply(venueRelatedTo, axis=1)
venue_df.apply(handledByEditor, axis=1)
venue_df.apply(handledByChairperson, axis=1)

# Loading paper
# paper_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/paper.csv')
paper_df = pd.read_csv('../data/paper.csv')
paper_df.apply(add_paper, axis=1)
paper_df.apply(submits, axis=1)
paper_df.apply(submittedTo, axis=1)
paper_df.apply(includedInVolume, axis=1)
paper_df.apply(includedInProceedings, axis=1)
paper_df.apply(paperRelatedTo, axis=1)

# Loading review
# review_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/review.csv')
review_df = pd.read_csv('../data/review.csv')
review_df.apply(add_review, axis=1)
review_df.apply(assigns, axis=1)
review_df.apply(assignedTo, axis=1)
review_df.apply(handsIn, axis=1)

# Loading volume
# volume_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/volume.csv')
volume_df = pd.read_csv('../data/volume.csv')
volume_df.apply(add_volume, axis=1)
volume_df.apply(isPartOfJournal, axis=1)

# Loading proceedings
# proceedings_df = pd.read_csv('drive/MyDrive/SDM_lab3/data/proceedings.csv')
proceedings_df = pd.read_csv('../data/proceedings.csv')
proceedings_df.apply(add_proceedings, axis=1)
proceedings_df.apply(isPartOfConference, axis=1)

0      None
1      None
2      None
3      None
4      None
       ... 
145    None
146    None
147    None
148    None
149    None
Length: 150, dtype: object

### Export Graph

In [77]:
g.serialize(destination='11-B2-GlennieParaskeva.ttl', format='turtle')

<Graph identifier=N9f7f002be4cf4a189cc9966b42b01543 (<class 'rdflib.graph.Graph'>)>