In [None]:
%pip install -q owlrl rdflib gdown==4.6.3

In [None]:
from owlrl import DeductiveClosure, RDFS_OWLRL_Semantics
from pathlib import Path
from rdflib import Literal, Namespace, BNode
from rdflib.namespace import RDF, RDFS, OWL, XSD
import gdown
import pickle

In [None]:
data_dir = Path('./data/')
graph_path = data_dir / Path('data_graph.pkl')
kb_path = data_dir / Path('knowledge_base.ttl')


def download_data() -> None:
  # Create folder if not existing
  data_dir.mkdir()

  gdown.download(
    id='1kVxl-g28-8qgjhOx9to1EDMiN_bKN7-_',
    output=str(graph_path),
    quiet=True
  )


if not data_dir.exists():
  download_data()

with open(graph_path, 'rb') as file:
  graph = pickle.load(file)

ct = Namespace('http://example.org/ontologies/clinical_trials/')
graph.bind('ct', ct)

## Ontology

We have the following base classes:
- ClinicalTrial
- Condition
- Criterion
- Gender

We have the following object properties:
- condition
- conditionPartOf
- eligibilityExclude
- eligibilityExcludedBy
- eligibilityGender
- eligibilityGenderPartOf
- eligibilityInclude
- eligibilityIncludedBy

We have the following base data properties:
- eligibilityHealthy
- eligibilityMaximumAge
- eligibilityMinimumAge

In [None]:
def create_two_node_collection(graph, payload_1, payload_2):
  head = BNode()
  tail = BNode()

  graph.add((head, RDF.first, payload_1))
  graph.add((head, RDF.rest, tail))
  graph.add((tail, RDF.first, payload_2))
  graph.add((tail, RDF.rest, RDF.nil))

  return head

In [None]:
# == Object properties ==

# condition
graph.add((ct.condition, RDF.type, OWL.ObjectProperty))
graph.add((ct.condition, OWL.inverseOf, ct.conditionPartOf))
graph.add((ct.condition, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.condition, RDFS.range, ct.Condition))

# conditionPartOf
graph.add((ct.conditionPartOf, RDF.type, OWL.ObjectProperty))

# eligibilityExclude
graph.add((ct.eligibilityExclude, RDF.type, OWL.ObjectProperty))
graph.add((ct.eligibilityExclude, OWL.inverseOf, ct.eligibilityExcludedBy))
graph.add((ct.eligibilityExclude, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityExclude, RDFS.range, ct.Criterion))

# eligibilityExcludedBy
graph.add((ct.eligibilityExcludedBy, RDF.type, OWL.ObjectProperty))

# eligibilityGender
graph.add((ct.eligibilityGender, RDF.type, OWL.ObjectProperty))
graph.add((ct.eligibilityGender, OWL.inverseOf, ct.eligibilityGenderPartOf))
graph.add((ct.eligibilityGender, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityGender, RDFS.range, ct.Gender))

# eligibilityGenderPartOf
graph.add((ct.eligibilityGenderPartOf, RDF.type, OWL.ObjectProperty))

# eligibilityInclude
graph.add((ct.eligibilityInclude, RDF.type, OWL.ObjectProperty))
graph.add((ct.eligibilityInclude, OWL.inverseOf, ct.eligibilityIncludedBy))
graph.add((ct.eligibilityInclude, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityInclude, RDFS.range, ct.Criterion))

# eligibilityIncludedBy
graph.add((ct.eligibilityIncludedBy, RDF.type, OWL.ObjectProperty))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == Data properties ==

# eligibilityHealthy
graph.add((ct.eligibilityHealthy, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityHealthy, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityHealthy, RDFS.range, XSD.boolean))

# eligibilityMaximumAge
graph.add((ct.eligibilityMaximumAge, RDF.type, OWL.DatatypeProperty))

# eligibilityMaximumDays
graph.add((ct.eligibilityMaximumDays, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMaximumDays, RDFS.subPropertyOf, ct.eligibilityMaximumAge))
graph.add((ct.eligibilityMaximumDays, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMaximumDays, RDFS.range, XSD.integer))

# eligibilityMaximumHours
graph.add((ct.eligibilityMaximumHours, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMaximumHours, RDFS.subPropertyOf, ct.eligibilityMaximumAge))
graph.add((ct.eligibilityMaximumHours, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMaximumHours, RDFS.range, XSD.integer))

# eligibilityMaximumMinutes
graph.add((ct.eligibilityMaximumMinutes, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMaximumMinutes, RDFS.subPropertyOf, ct.eligibilityMaximumAge))
graph.add((ct.eligibilityMaximumMinutes, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMaximumMinutes, RDFS.range, XSD.integer))

# eligibilityMaximumMonths
graph.add((ct.eligibilityMaximumMonths, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMaximumMonths, RDFS.subPropertyOf, ct.eligibilityMaximumAge))
graph.add((ct.eligibilityMaximumMonths, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMaximumMonths, RDFS.range, XSD.integer))

# eligibilityMaximumWeeks
graph.add((ct.eligibilityMaximumWeeks, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMaximumWeeks, RDFS.subPropertyOf, ct.eligibilityMaximumAge))
graph.add((ct.eligibilityMaximumWeeks, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMaximumWeeks, RDFS.range, XSD.integer))

# eligibilityMaximumYears
graph.add((ct.eligibilityMaximumYears, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMaximumYears, RDFS.subPropertyOf, ct.eligibilityMaximumAge))
graph.add((ct.eligibilityMaximumYears, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMaximumYears, RDFS.range, XSD.integer))

# eligibilityMinimumAge
graph.add((ct.eligibilityMinimumAge, RDF.type, OWL.DatatypeProperty))

# eligibilityMinimumDays
graph.add((ct.eligibilityMinimumDays, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMinimumDays, RDFS.subPropertyOf, ct.eligibilityMinimumAge))
graph.add((ct.eligibilityMinimumDays, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMinimumDays, RDFS.range, XSD.integer))

# eligibilityMinimumHours
graph.add((ct.eligibilityMinimumHours, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMinimumHours, RDFS.subPropertyOf, ct.eligibilityMinimumAge))
graph.add((ct.eligibilityMinimumHours, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMinimumHours, RDFS.range, XSD.integer))

# eligibilityMinimumMinutes
graph.add((ct.eligibilityMinimumMinutes, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMinimumMinutes, RDFS.subPropertyOf, ct.eligibilityMinimumAge))
graph.add((ct.eligibilityMinimumMinutes, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMinimumMinutes, RDFS.range, XSD.integer))

# eligibilityMinimumMonths
graph.add((ct.eligibilityMinimumMonths, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMinimumMonths, RDFS.subPropertyOf, ct.eligibilityMinimumAge))
graph.add((ct.eligibilityMinimumMonths, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMinimumMonths, RDFS.range, XSD.integer))

# eligibilityMinimumWeeks
graph.add((ct.eligibilityMinimumWeeks, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMinimumWeeks, RDFS.subPropertyOf, ct.eligibilityMinimumAge))
graph.add((ct.eligibilityMinimumWeeks, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMinimumWeeks, RDFS.range, XSD.integer))

# eligibilityMinimumYears
graph.add((ct.eligibilityMinimumYears, RDF.type, OWL.DatatypeProperty))
graph.add((ct.eligibilityMinimumYears, RDFS.subPropertyOf, ct.eligibilityMinimumAge))
graph.add((ct.eligibilityMinimumYears, RDFS.domain, ct.ClinicalTrial))
graph.add((ct.eligibilityMinimumYears, RDFS.range, XSD.integer))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == Classes ==

graph.add((ct.ClinicalTrial, RDF.type, OWL.Class))
graph.add((ct.Condition, RDF.type, OWL.Class))
graph.add((ct.Criterion, RDF.type, OWL.Class))
graph.add((ct.Gender, RDF.type, OWL.Class))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == ClinicalTrial Gender subclasses ==

# GenderClinicalTrial
graph.add((ct.GenderClinicalTrial, RDF.type, OWL.Class))
graph.add((ct.GenderClinicalTrial, RDFS.subClassOf, ct.ClinicalTrial))

# FemaleClinicalTrial
graph.add((ct.FemaleClinicalTrial, RDF.type, OWL.Class))
equivalent_class = BNode()
graph.add((equivalent_class, RDF.type, OWL.Restriction))
graph.add((equivalent_class, OWL.onProperty, ct.eligibilityGender))
graph.add((equivalent_class, OWL.hasValue, ct.female))
graph.add((ct.FemaleClinicalTrial, OWL.equivalentClass, equivalent_class))
graph.add((ct.FemaleClinicalTrial, RDFS.subClassOf, ct.GenderClinicalTrial))

# MaleClinicalTrial
graph.add((ct.MaleClinicalTrial, RDF.type, OWL.Class))
equivalent_class = BNode()
graph.add((equivalent_class, RDF.type, OWL.Restriction))
graph.add((equivalent_class, OWL.onProperty, ct.eligibilityGender))
graph.add((equivalent_class, OWL.hasValue, ct.male))
graph.add((ct.MaleClinicalTrial, OWL.equivalentClass, equivalent_class))
graph.add((ct.MaleClinicalTrial, RDFS.subClassOf, ct.GenderClinicalTrial))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == ClinicalTrial Health subclasses ==

# HealthClinicalTrial
graph.add((ct.HealthClinicalTrial, RDF.type, OWL.Class))
graph.add((ct.HealthClinicalTrial, RDFS.subClassOf, ct.ClinicalTrial))

# HealthyClinicalTrial
graph.add((ct.HealthyClinicalTrial, RDF.type, OWL.Class))
equivalent_class = BNode()
graph.add((equivalent_class, RDF.type, OWL.Restriction))
graph.add((equivalent_class, OWL.onProperty, ct.eligibilityHealthy))
graph.add((equivalent_class, OWL.hasValue, Literal(True, datatype=XSD.boolean)))
graph.add((ct.HealthyClinicalTrial, OWL.equivalentClass, equivalent_class))
graph.add((ct.HealthyClinicalTrial, RDFS.subClassOf, ct.HealthClinicalTrial))

# UnhealthyClinicalTrial
graph.add((ct.UnhealthyClinicalTrial, RDF.type, OWL.Class))
equivalent_class = BNode()
graph.add((equivalent_class, RDF.type, OWL.Restriction))
graph.add((equivalent_class, OWL.onProperty, ct.eligibilityHealthy))
graph.add((equivalent_class, OWL.hasValue, Literal(False, datatype=XSD.boolean)))
graph.add((ct.UnhealthyClinicalTrial, OWL.equivalentClass, equivalent_class))
graph.add((ct.UnhealthyClinicalTrial, RDFS.subClassOf, ct.HealthClinicalTrial))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == ClinicalTrial Temporal subclasses ==

def create_temporal_clinical_trial_subclass(graph, name: str, max_property: str, min_property: str):
  graph.add((ct[name], RDF.type, OWL.Class))

  # Create restriction collection payloads
  max_restriction = BNode()
  min_restriction = BNode()

  graph.add((max_restriction, RDF.type, OWL.Restriction))
  graph.add((max_restriction, OWL.onProperty, ct[max_property]))
  graph.add((max_restriction, OWL.someValuesFrom, XSD.integer))

  graph.add((min_restriction, RDF.type, OWL.Restriction))
  graph.add((min_restriction, OWL.onProperty, ct[min_property]))
  graph.add((min_restriction, OWL.someValuesFrom, XSD.integer))

  collection = create_two_node_collection(graph, max_restriction, min_restriction)

  # Add union constraint
  union_class = BNode()
  graph.add((union_class, RDF.type, OWL.Class))
  graph.add((union_class, OWL.unionOf, collection))

  # Add equivalence constraint
  graph.add((ct[name], OWL.equivalentClass, union_class))

  # Add subclass constraint
  graph.add((ct[name], RDFS.subClassOf, ct.TemporalClinicalTrial))


# DayClinicalTrial
create_temporal_clinical_trial_subclass(
  graph,
  'DayClinicalTrial',
  'eligibilityMaximumDays',
  'eligibilityMinimumDays'
)
# HourClinicalTrial
create_temporal_clinical_trial_subclass(
  graph,
  'HourClinicalTrial',
  'eligibilityMaximumHours',
  'eligibilityMinimumHours'
)
# MinuteClinicalTrial
create_temporal_clinical_trial_subclass(
  graph,
  'MinuteClinicalTrial',
  'eligibilityMaximumMinutes',
  'eligibilityMinimumMinutes'
)
# MonthClinicalTrial
create_temporal_clinical_trial_subclass(
  graph,
  'MonthClinicalTrial',
  'eligibilityMaximumMonths',
  'eligibilityMinimumMonths'
)
# WeekClinicalTrial
create_temporal_clinical_trial_subclass(
  graph,
  'WeekClinicalTrial',
  'eligibilityMaximumWeeks',
  'eligibilityMinimums'
)
# YearClinicalTrial
create_temporal_clinical_trial_subclass(
  graph,
  'YearClinicalTrial',
  'eligibilityMaximumYears',
  'eligibilityMinimumYears'
)

In [None]:
# == Condition subclasses ==

# ExclusionCriterion
graph.add((ct.ExclusionCriterion, RDF.type, OWL.Class))
equivalent_class = BNode()
graph.add((equivalent_class, RDF.type, OWL.Restriction))
graph.add((equivalent_class, OWL.onProperty, ct.eligibilityExcludedBy))
graph.add((equivalent_class, OWL.someValuesFrom, ct.ClinicalTrial))
graph.add((ct.ExclusionCriterion, OWL.equivalentClass, equivalent_class))
graph.add((ct.ExclusionCriterion, RDFS.subClassOf, ct.Criterion))

# InclusionCriterion
graph.add((ct.InclusionCriterion, RDF.type, OWL.Class))
equivalent_class = BNode()
graph.add((equivalent_class, RDF.type, OWL.Restriction))
graph.add((equivalent_class, OWL.onProperty, ct.eligibilityIncludedBy))
graph.add((equivalent_class, OWL.someValuesFrom, ct.ClinicalTrial))
graph.add((ct.InclusionCriterion, OWL.equivalentClass, equivalent_class))
graph.add((ct.InclusionCriterion, RDFS.subClassOf, ct.Criterion))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == General axioms ==

axiom_node = BNode()
graph.add((axiom_node, RDF.type, OWL.AllDisjointClasses))

head = BNode()
node1 = BNode()
node2 = BNode()
node3 = BNode()

graph.add((head, RDF.rest, node1))
graph.add((node1, RDF.rest, node2))
graph.add((node2, RDF.rest, node3))
graph.add((node3, RDF.rest, RDF.nil))

graph.add((head, RDF.first, ct.ClinicalTrial))
graph.add((node1, RDF.first, ct.Condition))
graph.add((node2, RDF.first, ct.Criterion))
graph.add((node3, RDF.first, ct.Gender))

graph.add((axiom_node, OWL.members, head))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>

In [None]:
# == Inference ==
DeductiveClosure(
  RDFS_OWLRL_Semantics,
  rdfs_closure=False,
  axiomatic_triples=False,
  datatype_axioms=False
).expand(graph)

In [None]:
graph.serialize(destination=str(kb_path))

<Graph identifier=Nde9e815a2df14d08a5c6aac0c254f899 (<class 'rdflib.graph.Graph'>)>