# BMKG Project
## Author: Aurélien Bertrand - I6256590

This notebook is intended to create the RDF representation of the datasets obtained. Namely, a set of planetary systems, with confirmed exoplanets. See report for more details.

### 1. Install and import libraries

In [1]:
!pip install typing pandas numpy rdflib



In [2]:
from typing import Dict
from typing import Union

import pandas as pd
import numpy as np

from rdflib import Namespace
from rdflib import Graph
from rdflib import RDFS
from rdflib import RDF
from rdflib import URIRef
from rdflib import Literal
from rdflib import XSD
from rdflib import TIME
from rdflib import OWL
from rdflib import DCTERMS



### 2. Load the data

In [3]:
ps = pd.read_csv("./data/planetary_systems.csv", low_memory=False)
ps = ps[ps["default_flag"] == 1]  # Get the most up-to-date records for each observation
ps.head()

Unnamed: 0,pl_name,pl_letter,hostname,hd_name,hip_name,tic_id,gaia_id,default_flag,pl_refname,sy_refname,...,sy_jmagerr1,sy_jmagerr2,sy_jmagstr,sy_hmag,sy_hmagerr1,sy_hmagerr2,sy_hmagstr,sy_kmag,sy_kmagerr1,sy_kmagerr2
0,OGLE-TR-10 b,b,OGLE-TR-10,,,TIC 130150682,Gaia DR2 4056443366649948160,1,<a refstr=TORRES_ET_AL__2008 href=https://ui.a...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,...,,,13.692,13.314,0.121,-0.121,13.314&plusmn;0.121,12.856,,
2,BD-08 2823 c,c,BD-08 2823,,HIP 49067,TIC 33355302,Gaia DR2 3770419611540574080,1,<a refstr=HEBRARD_ET_AL__2010 href=https://ui....,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,...,0.02,-0.02,7.96&plusmn;0.02,7.498,0.047,-0.047,7.498&plusmn;0.047,7.323,0.021,-0.021
3,HR 8799 c,c,HR 8799,HD 218396,HIP 114189,TIC 245368902,Gaia DR2 2832463659640297472,1,<a refstr=MAROIS_ET_AL__2008 href=https://ui.a...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,...,0.027,-0.027,5.383&plusmn;0.027,5.28,0.018,-0.018,5.280&plusmn;0.018,5.24,0.018,-0.018
24,HD 110014 b,b,HD 110014,HD 110014,HIP 61740,TIC 32448045,Gaia DR2 3676091134604409728,1,<a refstr=DE_MEDEIROS_ET_AL__2009 href=https:/...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,...,0.308,-0.308,2.674&plusmn;0.308,2.172,0.25,-0.25,2.172&plusmn;0.250,2.011,0.312,-0.312
25,TOI-2084 b,b,TOI-2084,,,TIC 441738827,Gaia DR2 1652137995942479744,1,<a refstr=BARKAOUI_ET_AL__2023 href=https://ui...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,...,0.021,-0.021,11.961&plusmn;0.021,11.356,0.018,-0.018,11.356&plusmn;0.018,11.148,0.02,-0.02


In [4]:
stellar = pd.read_csv("./data/stellar.csv", low_memory=False)
stellar.head()

Unnamed: 0,hostname,hd_name,hip_name,tic_id,st_refname,sy_refname,ra,rastr,dec,decstr,...,st_spectype,sy_kepmag,sy_kepmagerr1,sy_kepmagerr2,st_rotp,st_rotperr1,st_rotperr2,st_rotplim,gaia_id,cb_flag
0,MOA-2010-BLG-117L,,,,<a refstr=BENNETT_ET_AL__2018 href=https://ui....,<a refstr=BENNETT_ET_AL__2018 href=https://ui....,271.95697,18h07m49.67s,-25.344639,-25d20m40.7s,...,,,,,,,,,,0
1,Kepler-914,,,TIC 159386728,<a refstr=BERGER_ET_AL__2018 href=https://ui.a...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,290.412077,19h21m38.90s,44.899323,+44d53m57.56s,...,,13.574,,,,,,,Gaia DR2 2127019915540964224,0
2,Kepler-579,,,TIC 138959504,<a refstr=BATALHA_ET_AL__2013 href=https://ui....,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,295.002077,19h40m00.50s,39.952861,+39d57m10.30s,...,,14.272,,,,,,,Gaia DR2 2076477053946253952,0
3,Kepler-1419,,,TIC 137548281,<a refstr=Q1_Q16_KOI_TABLE href=https://exopla...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,292.254592,19h29m01.10s,37.971376,+37d58m16.95s,...,,15.761,,,,,,,Gaia DR2 2051897265347291520,0
4,Kepler-1610,,,TIC 272073436,<a refstr=Q1_Q17_DR25_KOI_TABLE href=https://e...,<a refstr=STASSUN_ET_AL__2019 href=https://ui....,295.90498,19h43m37.20s,45.40543,+45d24m19.55s,...,,15.85,,,,,,,Gaia DR2 2079807646464016256,0


### 3. Define helper functions

In [5]:
PARAM_NAME = "parameter_name"
PARAM_UNIT = "parameter_unit"

# Formats the name of an entity to be used as a URI
def format_name(name: str) -> str:
    return name.strip().replace(" ", "_")

# Create a quantitative value (value with uncertainty around it)
def create_quantitative_value(
    name: str, 
    param: str,
    value: float, 
    min_eps: Union[None, float], 
    max_eps: Union[None, float], 
    unit: Union[None, URIRef],
) -> URIRef:
    observation_uri = OBSERVATION[format_name(name=name) + "/" + param]
    ExoKG.add((observation_uri, RDF.type, SCHEMA.QuantitativeValue))
    ExoKG.add((observation_uri, SCHEMA.value, Literal(value, datatype=XSD.double)))
    if unit is not None:
        ExoKG.add((observation_uri, SCHEMA.unitCode, unit))
    if min_eps is not None and not pd.isna(min_eps):
        ExoKG.add((observation_uri, SCHEMA.minValue, Literal(value+min_eps, datatype=XSD.double)))
    if max_eps is not None and not pd.isna(max_eps):
        ExoKG.add((observation_uri, SCHEMA.maxValue, Literal(value+max_eps, datatype=XSD.double)))
    
    return observation_uri

# Adds relevant parameters to celestial objects (stars and exoplanets)
def add_object_parameters(
    object_uri: URIRef,
    object_row: pd.Series, 
    parameters: Dict[str, Dict[str, URIRef]], 
    name: str
) -> None:
    for param, att in parameters.items():
        value = object_row[param]
        if pd.isna(value):
            continue
            
        max_name = param + "err1"
        max_value = object_row[max_name] if max_name in object_row.index else None
        
        min_name = param + "err2"
        min_value = object_row[min_name] if min_name in object_row.index else None 
        
        variable_uri = create_quantitative_value(
            name=name,
            param=param,
            value=value, 
            min_eps=min_value,
            max_eps=max_value,
            unit=att[PARAM_UNIT]
        )
        ExoKG.add((object_uri, att[PARAM_NAME], variable_uri))

### 4. Create the ontology with mappings to existing vocabularies


In [6]:
# Create graph
ExoKG = Graph()

In [7]:
url = "https://example.org/ontology/"

1) Add namespaces

In [8]:
# Self-defined ontology
EX = Namespace(url)
OBSERVATION = Namespace(url + "observation/")
DISCOVERY = Namespace(url + "discovery/")
CELESTIAL_BODY = Namespace(url + "celestial_body/")
EXOPLANET = Namespace(url + "celestial_body/exoplanet/")
STAR = Namespace(url + "celestial_body/star/")
PLANETARY_SYSTEM = Namespace(url + "planetary_system/")
UNIT = Namespace(url + "unit/")

# TODO: dbp:discoveryMethod and dbr:Direct_imaging
# Existing ontologies
SCHEMA = Namespace("https://schema.org/")
UCUM = Namespace("https://w3id.org/uom/")
OM2 = Namespace("http://www.ontology-of-units-of-measure.org/resource/om-2/")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
WD = Namespace("https://www.wikidata.org/wiki/")
DBO = Namespace("https://dbpedia.org/ontology/")
DBP = Namespace("https://dbpedia.org/property/")
DBR = Namespace("https://dbpedia.org/page/") # TODO: check everywhere where to use DBO and DBP

In [9]:
ExoKG.bind("ex", EX)
ExoKG.bind("obs", OBSERVATION)
ExoKG.bind("disc", DISCOVERY)
ExoKG.bind("cb", CELESTIAL_BODY)
ExoKG.bind("exo", EXOPLANET)
ExoKG.bind("star", STAR)
ExoKG.bind("ps", PLANETARY_SYSTEM)
ExoKG.bind("unit", UNIT)

ExoKG.bind("schema", SCHEMA)
ExoKG.bind("ucum", UCUM)
ExoKG.bind("oum", OM2)
ExoKG.bind("skos", SKOS)
ExoKG.bind("wd", WD)
ExoKG.bind("dbo", DBO)
ExoKG.bind("dbp", DBP)
ExoKG.bind("dbr", DBR)

2. Define relations / entities that are widely used

In [10]:
type = RDF.type
label = RDFS.label
definition = SKOS.definition
domain = RDFS.domain
range = RDFS.range

identifier = DCTERMS.identifier

Class = RDFS.Class
Property = RDF.Property

same_as = OWL.sameAs

broader = SKOS.broader

quantitative_value = SCHEMA.QuantitativeValue

3) Define units

In [11]:
Unit = EX.Unit
ExoKG.add((Unit, type, Class))
ExoKG.add((Unit, same_as, OM2.Unit))

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

In [12]:
# Standard UCUM units
astronomical_units = UNIT.astronomical_units
ExoKG.add((astronomical_units, type, Unit))
ExoKG.add((astronomical_units, same_as, UCUM.AU))

kilometer = UNIT.kilometer
ExoKG.add((kilometer, type, Unit))
ExoKG.add((kilometer, same_as, UCUM.km))

kelvin = UNIT.kelvin
ExoKG.add((kelvin, type, Unit))
ExoKG.add((kelvin, same_as, UCUM.K))

degree = UNIT.degree
ExoKG.add((degree, type, Unit))
ExoKG.add((degree, same_as, UCUM.deg))

g_cm3 = UNIT.gram_per_cubic_centimeter
ExoKG.add((g_cm3, type, Unit))
ExoKG.add((g_cm3, same_as, UCUM["ExoKG.cm-3"]))

w_m2 = UNIT.watt_per_square_meter
ExoKG.add((w_m2, type, Unit))
ExoKG.add((w_m2, same_as, UCUM["W.m-2"]))

parsec = UNIT.parsec
ExoKG.add((parsec, type, Unit))
ExoKG.add((parsec, same_as, UCUM.pc))

kilogram = UNIT.kilogram
ExoKG.add((kilogram, type, Unit))
ExoKG.add((kilogram, same_as, UCUM.kg))

cm_s2 = UNIT.centimeter_per_square_second
ExoKG.add((cm_s2, type, Unit))
ExoKG.add((cm_s2, same_as, UCUM["cm.s-2"]))

km_s = UNIT.kilometer_per_second
ExoKG.add((km_s, type, Unit))
ExoKG.add((km_s, same_as, UCUM["km.s-1"]))

watt = UNIT.watt
ExoKG.add((watt, type, Unit))
ExoKG.add((watt, same_as, UCUM.W))

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

In [13]:
symbol = UNIT.symbol
ExoKG.add((symbol, type, Property))
ExoKG.add((symbol, same_as, OM2.symbol))

factor = UNIT.factor
ExoKG.add((factor, type, Property))
ExoKG.add((factor, same_as, OM2.hasFactor))

# Extra units of measurements
solar_radius = UNIT.solar_radius
ExoKG.add((solar_radius, type, Unit))
ExoKG.add((solar_radius, symbol, Literal("R☉", datatype=XSD.string)))
ExoKG.add((solar_radius, factor, Literal(6.955e+8, datatype=XSD.double)))
ExoKG.add((solar_radius, label, Literal("Solar radius", lang="en")))
ExoKG.add((solar_radius, definition, Literal("Radius of the Sun, used as unit of radius", lang="en")))
ExoKG.add((solar_radius, same_as, DBO.Solar_radius))

earth_radius = UNIT.earth_radius
ExoKG.add((earth_radius, type, Unit))
ExoKG.add((earth_radius, symbol, Literal("R🜨", datatype=XSD.string)))
ExoKG.add((earth_radius, factor, Literal(6378.137, datatype=XSD.double)))
ExoKG.add((earth_radius, label, Literal("Earth radius", lang="en")))
ExoKG.add((earth_radius, definition, Literal("Radius of planet Earth, used as unit of radius", lang="en")))
ExoKG.add((earth_radius, same_as, DBO.Earth_radius))

jupiter_radius = UNIT.jupiter_radius
ExoKG.add((jupiter_radius, type, Unit))
ExoKG.add((jupiter_radius, symbol, Literal("RJ", datatype=XSD.string)))
ExoKG.add((jupiter_radius, factor, Literal(71492, datatype=XSD.double)))
ExoKG.add((jupiter_radius, label, Literal("Jupiter radius", lang="en")))
ExoKG.add((jupiter_radius, definition, Literal("Radius of planet Jupiter, used as unit of radius", lang="en")))
ExoKG.add((jupiter_radius, same_as, DBO.Jupiter_radius))

solar_mass = UNIT.solar_mass
ExoKG.add((solar_mass, type, Unit))
ExoKG.add((solar_mass, symbol, Literal("M☉", datatype=XSD.string)))
ExoKG.add((solar_mass, factor, Literal(1.98892e+30, datatype=XSD.double)))
ExoKG.add((solar_mass, label, Literal("Solar mass", lang="en")))
ExoKG.add((solar_mass, definition, Literal("Mass of the Sun, used as unit of weight", lang="en")))
ExoKG.add((solar_mass, same_as, DBO.Earth_mass))

earth_mass = UNIT.earth_mass
ExoKG.add((earth_mass, type, Unit))
ExoKG.add((earth_mass, symbol, Literal("M🜨", datatype=XSD.string)))
ExoKG.add((earth_mass, factor, Literal(5.9724e+24, datatype=XSD.double)))
ExoKG.add((earth_mass, label, Literal("Earth mass", lang="en")))
ExoKG.add((earth_mass, definition, Literal("Mass of planet Earth, used as unit of weight", lang="en")))
ExoKG.add((earth_mass, same_as, DBO.Earth_mass))

jupiter_mass = UNIT.jupiter_mass
ExoKG.add((jupiter_mass, type, Unit))
ExoKG.add((jupiter_mass, symbol, Literal("MJ", datatype=XSD.string)))
ExoKG.add((jupiter_mass, factor, Literal(1.89813e+27, datatype=XSD.double)))
ExoKG.add((jupiter_mass, label, Literal("Jupiter mass", lang="en")))
ExoKG.add((jupiter_mass, definition, Literal("Mass of planet Jupiter, used as unit of weight", lang="en")))
ExoKG.add((jupiter_mass, same_as, DBO.Jupiter_mass))

solar_luminosity = UNIT.solar_luminosity
ExoKG.add((solar_luminosity, type, Unit))
ExoKG.add((solar_luminosity, symbol, Literal("L☉", datatype=XSD.string)))
ExoKG.add((solar_luminosity, factor, Literal(3.839e26, datatype=XSD.double)))
ExoKG.add((solar_luminosity, label, Literal("Solar luminosity", lang="en")))
ExoKG.add((solar_luminosity, definition, Literal("Luminosity of the Sun, used as unit of luminosity", lang="en")))
ExoKG.add((solar_luminosity, same_as, OM2.solarLuminosity))

earth_flux = UNIT.earth_flux
ExoKG.add((earth_flux, type, Unit))
ExoKG.add((earth_flux, symbol, Literal("GSC", datatype=XSD.string)))
ExoKG.add((earth_flux, factor, Literal(1361, datatype=XSD.double)))
ExoKG.add((earth_flux, label, Literal("Earth flux", lang="en")))
ExoKG.add((earth_flux, definition, Literal("Insolation flux provided by the Sun on the Earth, used as a unit of insolation flux", lang="en")))
ExoKG.add((earth_flux, same_as, DBO.Solar_constant))

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

In [14]:
# Non-UCUM units
days = UNIT.days
ExoKG.add((days, type, Unit))
ExoKG.add((days, same_as, TIME.days))

gigayear = UNIT.gigayear
ExoKG.add((gigayear, type, Unit))
ExoKG.add((gigayear, factor, Literal(1e+9, datatype=XSD.double)))
ExoKG.add((gigayear, same_as, OM2.gigayear))

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

4) Define classes

In [15]:
spectral_type = STAR.Spectral_type
ExoKG.add((spectral_type, type, Class))
ExoKG.add((spectral_type, label, Literal("Spectral type", lang="en")))
ExoKG.add((spectral_type, definition, Literal("Class of stars based on their spectral characteristics", lang="en")))
ExoKG.add((spectral_type, same_as, DBP.Stellar_classification))

planetary_system = EX.Planetary_system
ExoKG.add((planetary_system, type, Class))
ExoKG.add((planetary_system, label, Literal("Planetary system", lang="en")))
ExoKG.add((planetary_system, definition, Literal("Groups of non-stellar objects in or out of orbit around a star or star system", lang="en")))
ExoKG.add((planetary_system, same_as, DBP.Planetary_system))

celestial_body = EX.Celestial_body
ExoKG.add((celestial_body, type, Class))
ExoKG.add((celestial_body, same_as, DBO.CelestialBody))

star = EX.Star
ExoKG.add((star, type, Class))
ExoKG.add((star, RDFS.subClassOf, celestial_body))
ExoKG.add((star, same_as, DBO.Star))

exoplanet = EX.Exoplanet 
ExoKG.add((exoplanet, type, Class))
ExoKG.add((exoplanet, RDFS.subClassOf, celestial_body))
ExoKG.add((exoplanet, label, Literal("Exoplanet", lang="en")))
ExoKG.add((exoplanet, definition, Literal("Any planet beyond the Solar System", lang="en")))
ExoKG.add((exoplanet, same_as, DBR.Exoplanet))

discovery_method = EXOPLANET.Discovery_method
ExoKG.add((discovery_method, type, Class))
ExoKG.add((discovery_method, same_as, DBO.detectionMethod))

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

5) Define properties

In [16]:
distance_from_earth = CELESTIAL_BODY.distance_from_earth
ExoKG.add((distance_from_earth, type, Property))
ExoKG.add((distance_from_earth, label, Literal("Distance from Earth", lang="en")))
ExoKG.add((distance_from_earth, definition, Literal("Estimated distance to the celestial body (relative to Earth)", lang="en")))
ExoKG.add((distance_from_earth, domain, celestial_body))
ExoKG.add((distance_from_earth, range, quantitative_value))
ExoKG.add((distance_from_earth, broader, DBO.distance))

has_spectral_class = STAR.has_spectral_type
ExoKG.add((has_spectral_class, type, Property))
ExoKG.add((has_spectral_class, label, Literal("Is a star of type")))
ExoKG.add((has_spectral_class, definition, Literal("Classification of the star based on their spectral characteristics following the Morgan-Keenan system", lang="en")))
ExoKG.add((has_spectral_class, domain, star))
ExoKG.add((has_spectral_class, range, spectral_type))
ExoKG.add((has_spectral_class, same_as, DBO.stellarClassification))

n_stars = PLANETARY_SYSTEM.number_of_stars
ExoKG.add((n_stars, type, Property))
ExoKG.add((n_stars, label, Literal("Number of stars", lang="en")))
ExoKG.add((n_stars, definition, Literal("Number of stars in the planetary system", lang="en")))
ExoKG.add((n_stars, domain, planetary_system))
ExoKG.add((n_stars, range, XSD.nonNegativeInteger))

n_moons = PLANETARY_SYSTEM.number_of_moons
ExoKG.add((n_moons, type, Property))
ExoKG.add((n_moons, label, Literal("Number of moons", lang="en")))
ExoKG.add((n_moons, definition, Literal("Number of moons in the planetary system", lang="en")))
ExoKG.add((n_moons, domain, planetary_system))
ExoKG.add((n_moons, range, XSD.nonNegativeInteger))

n_planets = PLANETARY_SYSTEM.number_of_planets
ExoKG.add((n_planets, type, Property))
ExoKG.add((n_planets, label, Literal("Number of planets", lang="en")))
ExoKG.add((n_planets, definition, Literal("Number of confirmed planets in the planetary system", lang="en")))
ExoKG.add((n_planets, domain, planetary_system))
ExoKG.add((n_planets, range, XSD.nonNegativeInteger))

located_in = CELESTIAL_BODY.located_in
ExoKG.add((located_in, type, Property))
ExoKG.add((located_in, label, Literal("Located in", lang="en")))
ExoKG.add((located_in, definition, Literal("Indicates that the celestial body is situated in a planetary system", lang="en")))
ExoKG.add((located_in, domain, celestial_body))
ExoKG.add((located_in, range, planetary_system))
ExoKG.add((located_in, same_as, DBP.locatedIn))

planet_letter = EXOPLANET.letter
ExoKG.add((planet_letter, type, Property))
ExoKG.add((planet_letter, label, Literal("Letter", lang="en")))
ExoKG.add((planet_letter, definition, Literal("Letter assigned to the planetary component of a planetary system. The first exoplanet discovered in a system is designated b; the second, c; the third, d; and so on")))
ExoKG.add((planet_letter, domain, exoplanet))
ExoKG.add((planet_letter, range, XSD.string))

effective_temperature = STAR.effective_temperature
ExoKG.add((effective_temperature, type, Property))
ExoKG.add((effective_temperature, label, Literal("Effective temperature", lang="en")))
ExoKG.add((effective_temperature, definition, Literal("Temperature of the star as modeled by a black body emitting the same total amount of electromagnetic radiation", lang="en")))
ExoKG.add((effective_temperature, domain, star))
ExoKG.add((effective_temperature, range, quantitative_value))
ExoKG.add((effective_temperature, broader, DBP.temperature))

luminosity = STAR.luminosity
ExoKG.add((luminosity, type, Property))
ExoKG.add((luminosity, label, Literal("Luminosity", lang="en")))
ExoKG.add((luminosity, definition, Literal("Amount of energy emitted by a star per unit time")))
ExoKG.add((luminosity, domain, star))
ExoKG.add((luminosity, range, quantitative_value))
ExoKG.add((luminosity, same_as, DBP.luminosity))

surface_gravity = STAR.surface_gravity
ExoKG.add((surface_gravity, type, Property))
ExoKG.add((surface_gravity, label, Literal("Surface gravity", lang="en")))
ExoKG.add((surface_gravity, definition, Literal("Gravitational acceleration experienced at the stellar surface", lang="en")))
ExoKG.add((surface_gravity, domain, star))
ExoKG.add((surface_gravity, range, quantitative_value))
ExoKG.add((surface_gravity, broader, DBO.surfaceGravity))

age = CELESTIAL_BODY.age
ExoKG.add((age, type, Property))
ExoKG.add((age, label, Literal("Age", lang="en")))
ExoKG.add((age, definition, Literal("Age of an astronomical object", lang="en")))
ExoKG.add((age, domain, celestial_body))
ExoKG.add((age, range, quantitative_value))
ExoKG.add((age, same_as, DBP.age))

rotational_velocity = STAR.rotational_velocity
ExoKG.add((rotational_velocity, type, Property))
ExoKG.add((rotational_velocity, label, Literal("Stellar rotational velocity", lang="en")))
ExoKG.add((rotational_velocity, definition, Literal("Rotational velocity at the equator of the star multiplied by the sine of the inclination", lang="en")))
ExoKG.add((rotational_velocity, domain, star))
ExoKG.add((rotational_velocity, range, quantitative_value))
ExoKG.add((rotational_velocity, same_as, DBP.rotationalVelocity))

rotational_period = STAR.rotational_period
ExoKG.add((rotational_period, type, Property))
ExoKG.add((rotational_period, label, Literal("Stellar rotational period", lang="en")))
ExoKG.add((rotational_period, definition, Literal("Time required for the star to complete one rotation, assuming it is a solid body", lang="en")))
ExoKG.add((rotational_period, domain, star))
ExoKG.add((rotational_period, range, quantitative_value))
ExoKG.add((rotational_period, same_as, DBO.rotationPeriod))

radial_velocity = STAR.radial_velocity
ExoKG.add((radial_velocity, type, Property))
ExoKG.add((radial_velocity, label, Literal("Radial velocity", lang="en")))
ExoKG.add((radial_velocity, definition, Literal("Velocity of the star in the direction of the line of sight", lang="en")))
ExoKG.add((radial_velocity, domain, star))
ExoKG.add((radial_velocity, range, quantitative_value))
ExoKG.add((radial_velocity, same_as, DBP.radialV))

orbital_period = EXOPLANET.orbital_period
ExoKG.add((orbital_period, type, Property))
ExoKG.add((orbital_period, label, Literal("Orbital period", lang="en")))
ExoKG.add((orbital_period, definition, Literal("Time the planet takes to make a complete orbit around the host star or system", lang="en")))
ExoKG.add((orbital_period, domain, exoplanet))
ExoKG.add((orbital_period, range, quantitative_value))
ExoKG.add((orbital_period, same_as, DBO.orbitalPeriod))

longest_radius = EXOPLANET.longest_radius
ExoKG.add((longest_radius, type, Property))
ExoKG.add((longest_radius, label, Literal("Longest radius", lang="en")))
ExoKG.add((longest_radius, definition, Literal("The longest radius of an elliptic orbit, or the projected separation in the plane of the sky", lang="en")))
ExoKG.add((longest_radius, domain, exoplanet))
ExoKG.add((longest_radius, range, quantitative_value))
ExoKG.add((longest_radius, same_as, DBP.semimajor))

radius = CELESTIAL_BODY.radius
ExoKG.add((radius, type, Property))
ExoKG.add((radius, label, Literal("Radius", lang="en")))
ExoKG.add((radius, definition, Literal("Length of a line segment from the center of the celestial body to its surface", lang="en")))
ExoKG.add((radius, domain, celestial_body))
ExoKG.add((radius, range, quantitative_value))
ExoKG.add((radius, RDFS.subPropertyOf, DBO.length))

mass = CELESTIAL_BODY.mass
ExoKG.add((mass, type, Property))
ExoKG.add((mass, label, Literal("Mass", lang="en")))
ExoKG.add((mass, definition, Literal("Mass of the celestial body", lang="en")))
ExoKG.add((mass, domain, celestial_body))
ExoKG.add((mass, range, quantitative_value))
ExoKG.add((mass, RDFS.subPropertyOf, DBO.mass))

orbital_eccentricity = EXOPLANET.orbital_eccentricity
ExoKG.add((orbital_eccentricity, type, Property))
ExoKG.add((orbital_eccentricity, label, Literal("Orbital eccentricity", lang="en")))
ExoKG.add((orbital_eccentricity, definition, Literal("Amount by which the orbit of the planet deviates from a perfect circle", lang="en")))
ExoKG.add((orbital_eccentricity, domain, exoplanet))
ExoKG.add((orbital_eccentricity, range, quantitative_value))
ExoKG.add((orbital_eccentricity, same_as, DBO.orbitalEccentricity))

density = CELESTIAL_BODY.density
ExoKG.add((density, type, Property))
ExoKG.add((density, label, Literal("Density", lang="en")))
ExoKG.add((density, definition, Literal("Amount of mass per unit of volume of the celestial body", lang="en")))
ExoKG.add((density, domain, celestial_body))
ExoKG.add((density, range, quantitative_value))
ExoKG.add((density, broader, DBO.density))

insolation_flux = EXOPLANET.insolation_flux
ExoKG.add((insolation_flux, type, Property))
ExoKG.add((insolation_flux, label, Literal("Insolation flux", lang="en")))
ExoKG.add((insolation_flux, definition, Literal("Insolation flux is another way to give the equilibrium temperature. It's given in units relative to those measured for the Earth from the Sun")))
ExoKG.add((insolation_flux, domain, exoplanet))
ExoKG.add((insolation_flux, range, quantitative_value))
ExoKG.add((insolation_flux, broader, DBP.flux))

equilibrium_temperature = EXOPLANET.equilibrium_temperature
ExoKG.add((equilibrium_temperature, type, Property))
ExoKG.add((equilibrium_temperature, label, Literal("Equilibrium temperature", lang="en")))
ExoKG.add((equilibrium_temperature, definition, Literal("The equilibrium temperature of the planet as modeled by a black body heated only by its host star, or for directly imaged planets, the effective temperature of the planet required to match the measured luminosity if the planet were a black body", lang="en")))
ExoKG.add((equilibrium_temperature, domain, exoplanet))
ExoKG.add((equilibrium_temperature, range, quantitative_value))
ExoKG.add((equilibrium_temperature, broader, DBP.temperature))

inclination = EXOPLANET.inclination
ExoKG.add((inclination, type, Property))
ExoKG.add((inclination, label, Literal("Inclination", lang="en")))
ExoKG.add((inclination, definition, Literal("Angle of the plane of the orbit relative to the plane perpendicular to the line-of-sight from Earth to the object", lang="en")))
ExoKG.add((inclination, domain, exoplanet))
ExoKG.add((inclination, range, quantitative_value))
ExoKG.add((inclination, same_as, DBP.inclination))

true_obliquity = EXOPLANET.obliquity
ExoKG.add((true_obliquity, type, Property))
ExoKG.add((true_obliquity, label, Literal("Obliquity", lang="en")))
ExoKG.add((true_obliquity, definition, Literal("Angle between the angular momentum vector of the rotation of the host star and the angular momentum vector of the orbit of the planet", lang="en")))
ExoKG.add((true_obliquity, domain, exoplanet))
ExoKG.add((true_obliquity, range, quantitative_value))
ExoKG.add((true_obliquity, broader, DBP.angle))

ratio_planet_stellar_radius = EXOPLANET.ratio_planet_stellar_radius
ExoKG.add((ratio_planet_stellar_radius, type, Property))
ExoKG.add((ratio_planet_stellar_radius, label, Literal("Ratio between planet and stellar radius", lang="en")))
ExoKG.add((ratio_planet_stellar_radius, definition, Literal("The planet radius divided by the stellar radius", lang="en")))
ExoKG.add((ratio_planet_stellar_radius, domain, exoplanet))
ExoKG.add((ratio_planet_stellar_radius, range, quantitative_value))
ExoKG.add((ratio_planet_stellar_radius, broader, DBP.ratio))

orbits_around = EXOPLANET.orbits_around
ExoKG.add((orbits_around, type, Property))
ExoKG.add((orbits_around, label, Literal("Orbits around", lang="en")))
ExoKG.add((orbits_around, definition, Literal("Indicates that an exoplanet orbits around a given star or star system", lang="en")))
ExoKG.add((orbits_around, domain, exoplanet))
ExoKG.add((orbits_around, range, star))
ExoKG.add((orbits_around, same_as, DBP.star))

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

### 5. Create RDF representation of the data

1. Add discovery methods

In [17]:
for x in ps["discoverymethod"].unique():
    discovery_method_uri = DISCOVERY["method/" + format_name(name=x)]
    ExoKG.add((discovery_method_uri, type, discovery_method))
    ExoKG.add((discovery_method_uri, label, Literal(x, lang="en")))

2. Add systems data

In [18]:
system_data = stellar.groupby(by="sy_name").agg(
    {
        x: "first" for x in [
            "sy_dist", "sy_disterr1", "sy_disterr2", "sy_snum", "sy_pnum", "sy_mnum"
        ]
    }
)

for system_name, row in system_data.iterrows():
    system_uri = PLANETARY_SYSTEM[format_name(name=system_name)]
    
    # Add distance from earth to system
    system_distance = row["sy_dist"]
    if not pd.isna(system_distance):
        distance_uri = create_quantitative_value(
            value=system_distance, 
            min_eps=row["sy_disterr2"], 
            max_eps=row["sy_disterr1"], 
            unit=parsec,
            name=system_name,
            param="sy_dist"
        )
        ExoKG.add((system_uri, distance_from_earth, distance_uri))
        
        # Add system parameters
        ExoKG.add((system_uri, type, planetary_system))
        ExoKG.add((system_uri, label, Literal(system_name, datatype=XSD.string)))
        ExoKG.add((system_uri, identifier, URIRef(f"https://exoplanetarchive.ipac.caltech.edu/overview/{system_name.replace(' ', '%20')}")))
        ExoKG.add((system_uri, n_stars, Literal(int(row["sy_snum"]), datatype=XSD.nonNegativeInteger)))
        ExoKG.add((system_uri, n_planets, Literal(int(row["sy_pnum"]), datatype=XSD.nonNegativeInteger)))
        ExoKG.add((system_uri, n_moons, Literal(int(row["sy_mnum"]), datatype=XSD.nonNegativeInteger)))

3. Add stellars data

In [19]:
# Check if all stars from the 'ps' table are indeed in the 'stellar' table
stars_ps = set(ps["hostname"])
stars_stellar = set(stellar["hostname"])
assert not stars_ps - stars_stellar, "Some stars from the 'ps' table are not in the 'stellar' table"

In [20]:
# Columns are log-scaled, which is not accepted as a common UCUM unit
stellar["st_logg"] = np.power(10, stellar["st_logg"])
stellar["st_lum"] = np.power(10, stellar["st_lum"])

In [21]:
# There is an issue in the spectral type of HR 8799, being 'F0+ V ({lambda} Boo)...'
stellar.loc[stellar["hostname"] == "HR 8799", "st_spectype"] = "A5 V"

In [22]:
# Add parameters to be non-relative to the Sun
stellar = stellar.rename(columns={
    "st_rad": "st_rads", 
    "st_raderr1": "st_radserr1", 
    "st_raderr2": "st_radserr2", 
    "st_mass": "st_masss", 
    "st_masserr1": "st_massserr1",
    "st_masserr2": "st_massserr2",
    "st_lum": "st_lums",
    "st_lumerr1": "st_lumserr1",
    "st_lumerr2": "st_lumserr2",
})

stellar["st_rad"] = stellar["st_rads"] * 6.955e8
stellar["st_raderr1"] = stellar["st_radserr1"] * 6.955e8
stellar["st_raderr2"] = stellar["st_radserr2"] * 6.955e8

stellar["st_mass"] = stellar["st_masss"] * 1.98892e30
stellar["st_masserr1"] = stellar["st_massserr1"] * 1.98892e30
stellar["st_masserr2"] = stellar["st_massserr2"] * 1.98892e30

stellar["st_lum"] = stellar["st_lums"] * 3.839e26
stellar["st_lumerr1"] = stellar["st_lumserr1"] * 3.839e26
stellar["st_lumerr2"] = stellar["st_lumserr2"] * 3.839e26

In [23]:
# TODO: "st_met", "st_meterr1", "st_meterr2"
# TODO: add system position!!
stellar_parameters = {
    "st_teff" : { PARAM_NAME: effective_temperature, PARAM_UNIT: kelvin },
    "st_rad"  : { PARAM_NAME: radius,                PARAM_UNIT: kilometer },
    "st_rads" : { PARAM_NAME: radius,                PARAM_UNIT: solar_radius },
    "st_mass" : { PARAM_NAME: mass,                  PARAM_UNIT: kilogram },
    "st_masss": { PARAM_NAME: mass,                  PARAM_UNIT: solar_mass },
    "st_lum"  : { PARAM_NAME: luminosity,            PARAM_UNIT: watt },
    "st_lums" : { PARAM_NAME: luminosity,            PARAM_UNIT: solar_luminosity },
    "st_logg" : { PARAM_NAME: surface_gravity,       PARAM_UNIT: cm_s2 },
    "st_age"  : { PARAM_NAME: age,                   PARAM_UNIT: gigayear },
    "st_dens" : { PARAM_NAME: density,               PARAM_UNIT: g_cm3 },
    "st_vsin" : { PARAM_NAME: rotational_velocity,   PARAM_UNIT: km_s },
    "st_rotp" : { PARAM_NAME: rotational_period,     PARAM_UNIT: days },
    "st_radv" : { PARAM_NAME: radial_velocity,       PARAM_UNIT: km_s }
}

stellar_filtered = stellar.groupby(by="hostname").agg({x: "first" for x in stellar.columns})
for _, row in stellar_filtered.iterrows():
    # Get system
    system_name = row["sy_name"]
    system_uri = PLANETARY_SYSTEM[format_name(name=system_name)]
    
    # Add star to star-system
    host_name = row["hostname"]
    host_uri = STAR[format_name(name=host_name)]
    ExoKG.add((host_uri, type, star))
    ExoKG.add((host_uri, label, Literal(host_name, datatype=XSD.string)))
    ExoKG.add((host_uri, located_in, system_uri))
    ExoKG.add((host_uri, identifier, URIRef(f"https://exoplanetarchive.ipac.caltech.edu/overview/{system_name.replace(' ', '%20')}#star_{host_name.replace(' ', '-')}_collapsible")))
    
    # Add stellar parameters
    add_object_parameters(
        object_uri=host_uri, 
        object_row=row,
        parameters=stellar_parameters, 
        name=host_name
    )

4. Add planets data

In [24]:
# Add parameters to be non-relative to Earth
ps = ps.rename(columns={
    "pl_insol": "pl_insole", 
    "pl_insolerr1": "pl_insoleerr1", 
    "pl_insolerr2": "pl_insoleerr2",
})

ps["pl_rad"] = ps["pl_rade"] * 6378.137
ps["pl_raderr1"] = ps["pl_radeerr1"] * 6378.137
ps["pl_raderr2"] = ps["pl_radeerr2"] * 6378.137

ps["pl_bmass"] = ps["pl_bmasse"] * 5.9724e+24
ps["pl_bmasserr1"] = ps["pl_bmasseerr1"] * 5.9724e+24
ps["pl_bmasserr2"] = ps["pl_bmasseerr2"] * 5.9724e+24

ps["pl_insol"] = ps["pl_insole"] * 1361
ps["pl_insolerr1"] = ps["pl_insoleerr1"] * 1361
ps["pl_insolerr2"] = ps["pl_insoleerr2"] * 1361

In [25]:
planet_parameters = {
    "pl_orbper"   : { PARAM_NAME: orbital_period,              PARAM_UNIT: days },
    "pl_orbsmax"  : { PARAM_NAME: longest_radius,              PARAM_UNIT: astronomical_units },
    "pl_rad"      : { PARAM_NAME: radius,                      PARAM_UNIT: kilometer },
    "pl_rade"     : { PARAM_NAME: radius,                      PARAM_UNIT: earth_radius },
    "pl_radj"     : { PARAM_NAME: radius,                      PARAM_UNIT: jupiter_radius },
    "pl_bmass"    : { PARAM_NAME: mass,                        PARAM_UNIT: kilogram },
    "pl_bmasse"   : { PARAM_NAME: mass,                        PARAM_UNIT: earth_mass },
    "pl_bmassj"   : { PARAM_NAME: mass,                        PARAM_UNIT: jupiter_mass },
    "pl_dens"     : { PARAM_NAME: density,                     PARAM_UNIT: g_cm3 },
    "pl_orbeccen" : { PARAM_NAME: orbital_eccentricity,        PARAM_UNIT: None },
    "pl_insol"    : { PARAM_NAME: insolation_flux,             PARAM_UNIT: w_m2 },
    "pl_insole"   : { PARAM_NAME: insolation_flux,             PARAM_UNIT: earth_flux },
    "pl_eqt"      : { PARAM_NAME: equilibrium_temperature,     PARAM_UNIT: kelvin },
    "pl_orbincl"  : { PARAM_NAME: inclination,                 PARAM_UNIT: degree },
    "pl_trueobliq": { PARAM_NAME: true_obliquity,              PARAM_UNIT: degree },
    "pl_ratror"   : { PARAM_NAME: ratio_planet_stellar_radius, PARAM_UNIT: None },
}

for _, row in ps.iterrows():
    # Add planet
    planet_name = row["pl_name"]
    planet_uri = EXOPLANET[format_name(name=planet_name)]
    ExoKG.add((planet_uri, type, exoplanet))
    ExoKG.add((planet_uri, label, Literal(planet_name, datatype=XSD.string)))
    ExoKG.add((planet_uri, planet_letter, Literal(row["pl_letter"], datatype=XSD.string)))
    
    # Add star
    host = row["hostname"]
    host_uri = STAR[format_name(name=host)]
    ExoKG.add((planet_uri, orbits_around, host_uri))
    
    # Add stellar type
    star_type = row["st_spectype"]
    if not pd.isna(star_type):
        star_type_uri = STAR[f"type/{format_name(name=star_type)}"]    
        ExoKG.add((star_type_uri, type, spectral_type))
        ExoKG.add((star_type_uri, label, Literal(star_type, lang="en")))
        ExoKG.add((host_uri, has_spectral_class, star_type_uri))
    
    # Add system
    system = stellar[stellar["hostname"] == host]["sy_name"].iloc[0]
    system_uri = PLANETARY_SYSTEM[format_name(name=system)]
    ExoKG.add((planet_uri, located_in, system_uri))
    ExoKG.add((planet_uri, identifier, URIRef(f"https://exoplanetarchive.ipac.caltech.edu/overview/{system.replace(' ', '%20')}#planet_{planet_name.replace(' ', '-')}_collapsible")))
    
    # Add discovery TODO
    discovery_uri = DISCOVERY[format_name(name=planet_name)]
    discovery_method_uri = DISCOVERY["method/" + format_name(name=row["discoverymethod"])]
    ExoKG.add((discovery_uri, type, SCHEMA.Observation))
    ExoKG.add((discovery_uri, SCHEMA.discovered, Literal(row["disc_year"], datatype=XSD.gYear)))
    ExoKG.add((discovery_uri, SCHEMA.about, planet_uri))
    ExoKG.add((discovery_uri, DBO.detectionMethod, discovery_method_uri))
    ExoKG.add((discovery_uri, EX.place, Literal(row["disc_facility"], lang="en")))  # TODO: this should be a facility element
    ExoKG.add((discovery_uri, SCHEMA.measurementMethod, Literal(row["disc_instrument"], lang="en")))
    
    # Add planet parameters
    add_object_parameters(
        object_uri=planet_uri,
        object_row=row, 
        parameters=planet_parameters, 
        name=planet_name
    )

### 6. Export graph

In [26]:
ExoKG.serialize(destination="planetary_systems_with_confirmed_exoplanets_KG.ttl", format="ttl")

<Graph identifier=N63a120279d044d7096e764c56b4cf53a (<class 'rdflib.graph.Graph'>)>

In [27]:
ESA = Namespace("http://astrothesaurus.org/uat/")

ExoKG.bind("esa", ESA)
ExoKG.bind("exoplanet", EXOPLANET)

In [28]:
discovery_methods_mappings = {
    "Transit": ESA["1709"],
    "Radial Velocity": ESA["1332"],
    "Imaging": ESA["387"],
    "Transit Timing Variations": ESA["1710"],
    "Microlensing": ESA["2147"],
    "Orbital Brightness Modulation": ESA["182"],
    "Astrometry": ESA["2130"],
    "Eclipse Timing Variations": ESA["443"],
    "Pulsar Timing": ESA["1305"],
    "Pulsation Timing Variations": ESA["1305"],
    "Disk Kinematics": ESA["888"]
}

for x in ps["discoverymethod"].unique():
    discovery_method_uri = DISCOVERY["method/" + format_name(name=x)]
    ExoKG.add((discovery_method_uri, SKOS.exactMatching, discovery_methods_mappings[x]))

In [29]:
# TODO: add labels and descriptions to all properties and classes!
# TODO: add references everywhere!
# TODO: check that there is no namespace accessed anymore in the code flow (only defined at the top)
# TODO: try to use https://dbpedia.org/ontology/CelestialBody as much as possible
# TODO: check for anything that is missing from the data
# TODO: make everything my own vocabulary and link to existing ontologies
# TODO: if cannot find sameAs, then should add label, definition, range, domain, broader, etc