# BMKG Project
## Author: Aurélien Bertrand - I6256590

This notebook is intended to create sample SPARQL queries to assess the graph quality and address the key research questions defined in the report.

### 1. Import libraries

In [1]:
!pip install pandas oxrdflib Pygments rdfpandas



In [2]:
import pandas as pd
from IPython.display import display
from IPython.display import HTML
from pygments import highlight
from pygments.lexers import SparqlLexer
from pygments.formatters import HtmlFormatter
from rdflib import Graph



### 2. Define function to run query

In [3]:
def run_query(graph, query):
    # Execute the SPARQL query
    results = graph.query(query)
    
    # Display the SPARQL query
    formatted_query = highlight(query, SparqlLexer(), HtmlFormatter(style='solarized-dark', full=True, nobackground=True))
    display(HTML(formatted_query))
    
    # Convert results to a Pandas DataFrame
    res_list = []
    for row in results:
        res_list.append([str(item) for item in row])
    df = pd.DataFrame(res_list, columns=[str(var) for var in results.vars]) if len(res_list) > 0 else pd.DataFrame()

    # Display the DataFrame as a table in Jupyter Notebook
    display(HTML(df.to_html()))

### 3. Load graph

In [4]:
ExoKG = Graph(store="Oxigraph")
ExoKG.parse("../planetary_systems_with_confirmed_exoplanets_KG.ttl")

print(f"Working with {len(ExoKG)} triples")

Working with 617137 triples


In [5]:
prefixes = """
PREFIX cb: <https://example.org/ontology/celestial_body/>
PREFIX dbo: <https://dbpedia.org/ontology/>
PREFIX dbp: <https://dbpedia.org/property/>
PREFIX dbr: <https://dbpedia.org/page/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX disc: <https://example.org/ontology/discovery/>
PREFIX ex: <https://example.org/ontology/>
PREFIX exo: <https://example.org/ontology/celestial_body/exoplanet/>
PREFIX oum: <http://www.ontology-of-units-of-measure.org/resource/om-2/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX pl_sys: <https://example.org/ontology/planetary_system/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <https://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX star: <https://example.org/ontology/celestial_body/star/>
PREFIX time: <http://www.w3.org/2006/time#>
PREFIX ucum: <https://w3id.org/uom/>
PREFIX unit: <https://example.org/ontology/unit/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
"""

### 4. Data quality

1. Number of classes

In [6]:
class_query = prefixes + """
SELECT (COUNT(DISTINCT(?class)) AS ?n_class)
WHERE {
    ?class a rdfs:Class .
}
"""

run_query(ExoKG, class_query)

Unnamed: 0,n_class
0,7


2. Number of properties

In [7]:
property_query = prefixes + """
SELECT (COUNT(DISTINCT(?property)) AS ?n_property)
WHERE {
    ?property a rdf:Property .
}
"""

run_query(ExoKG, property_query)

Unnamed: 0,n_property
0,28


3. Check for NaNs

In [8]:
nan_query = prefixes + """
SELECT (COUNT(?observation) AS ?n_nan)
WHERE {
    ?observation schema:value ?value .
    FILTER(DATATYPE(?value) != xsd:double || LCASE(STR(?value)) IN ("na", "nan"))
}
"""

run_query(ExoKG, nan_query)

Unnamed: 0,n_nan
0,0


4. Check for empty string

In [9]:
empty_string_query = prefixes + """
SELECT ?resource 
WHERE{  
    ?resource ?property "" 
}
"""

run_query(ExoKG, empty_string_query)

5. Check domain predicate range

In [10]:
no_domain_predicate_range_query = prefixes + """
SELECT ?s ?p ?o 
WHERE { 
    ?s ?p ?o .
    FILTER NOT EXISTS { 
        ?s rdf:type/rdfs:subClassOf* ?domain . 
        ?p rdf:type/rdfs:subPropertyOf* ?predicate . 
        ?o rdf:type/rdfs:subClassOf* ?range 
    } 
}
"""

run_query(ExoKG, no_domain_predicate_range_query)

6. Multiple values

In [11]:
multiple_values_query = prefixes + """
SELECT (COUNT(?value) as ?num_values) 
WHERE {
    ?observation a schema:Observation ;
        schema:value ?value .
}
GROUP BY ?observation
HAVING ?num_values > 1
"""

run_query(ExoKG, multiple_values_query)

#### 5. Queries

1. **RQ1**: Which planetary system hosts the largest number of exoplanets?

In [6]:
rq1_query = prefixes + """
SELECT ?ps_label ?n_planets
WHERE {
    ?ps a ex:Planetary_system ;
        rdfs:label ?ps_label ;
        pl_sys:number_of_planets ?n_planets .
} 
ORDER BY DESC(?n_planets)
LIMIT 1
"""

run_query(ExoKG, rq1_query)

Unnamed: 0,ps_label,n_planets
0,KOI-351,8


2. **RQ2**: Which facility has discovered the most exoplanets?

In [10]:
rq2_query = prefixes + """
SELECT ?facility (COUNT(?facility) AS ?count)
WHERE {
    ?discovery a schema:Observation ;
        ex:facility ?facility .
}
GROUP BY ?facility
ORDER BY DESC(?count)
LIMIT 1
"""

run_query(ExoKG, rq2_query)

Unnamed: 0,facility,count
0,Kepler,2779


3. **RQ3**: What are the key characteristics of exoplanets?

In [8]:
rq3_query = prefixes + """
SELECT 
    ?average_orbital_period
    ?average_longest_radius
    ?average_radius
    ?average_mass
    ?average_density
    ?avg_orbital_eccentricity 
    ?average_insolation_flux 
    ?average_equilibrium_temperature 
    ?average_inclination
    ?average_obliquity
    ?average_ratio_planet_stellar_radius
WHERE {
    {
        SELECT (AVG(?orbital_period_value) AS ?average_orbital_period)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:orbital_period ?orbital_period .
                ?orbital_period schema:value ?orbital_period_value .
            }
            FILTER(BOUND(?orbital_period_value))
        }
    }
    {
        SELECT (AVG(?longest_radius_value) AS ?average_longest_radius)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:longest_radius ?longest_radius .
                ?longest_radius schema:value ?longest_radius_value .
            }
            FILTER(BOUND(?longest_radius_value))
        }
    }
    {
        SELECT (AVG(?radius_value) AS ?average_radius)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet cb:radius ?radius .
                ?radius schema:value ?radius_value ;
                    schema:unitCode unit:kilometer .
            }
            FILTER(BOUND(?radius_value))
        }
    }
    {
        SELECT (AVG(?mass_value) AS ?average_mass)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet cb:mass ?mass .
                ?mass schema:value ?mass_value ;
                    schema:unitCode unit:kilogram .
            }
            FILTER(BOUND(?mass_value))
        }
    }
    {
        SELECT (AVG(?density_value) AS ?average_density)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet cb:density ?density .
                ?density schema:value ?density_value .
            }
            FILTER(BOUND(?density_value))
        }
    }
    {
        SELECT (AVG(?orbital_eccentricity_value) AS ?avg_orbital_eccentricity)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:orbital_eccentricity ?orbital_eccentricity .
                ?orbital_eccentricity schema:value ?orbital_eccentricity_value .
            }
            FILTER(BOUND(?orbital_eccentricity_value))
        }
    }
    {
    SELECT (AVG(?insolation_flux_value) AS ?average_insolation_flux)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:insolation_flux ?insolation_flux .
                ?insolation_flux schema:value ?insolation_flux_value ;
                    schema:unitCode unit:watt_per_square_meter .
            }
            FILTER(BOUND(?insolation_flux_value))
        }
    }
    {
        SELECT (AVG(?equilibrium_temperature_value) AS ?average_equilibrium_temperature)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:equilibrium_temperature ?equilibrium_temperature .
                ?equilibrium_temperature schema:value ?equilibrium_temperature_value .
            }
            FILTER(BOUND(?equilibrium_temperature_value))
        }
    }
    {
        SELECT (AVG(?inclination_value) AS ?average_inclination)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:inclination ?inclination .
                ?inclination schema:value ?inclination_value .
            }
            FILTER(BOUND(?inclination_value))
        }
    }
    {
        SELECT (AVG(?obliquity_value) AS ?average_obliquity)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:obliquity ?obliquity .
                ?obliquity schema:value ?obliquity_value .
            }
            FILTER(BOUND(?obliquity_value))
        }
    }
    {
        SELECT (AVG(?ratio_planet_stellar_radius_value) AS ?average_ratio_planet_stellar_radius)
        WHERE {
            ?exoplanet a ex:Exoplanet .
            OPTIONAL { 
                ?exoplanet exo:ratio_planet_stellar_radius ?ratio .
                ?ratio schema:value ?ratio_planet_stellar_radius_value .
            }
            FILTER(BOUND(?ratio_planet_stellar_radius_value))
        }
    }
}
"""

run_query(ExoKG, rq3_query)

Unnamed: 0,average_orbital_period,average_longest_radius,average_radius,average_mass,average_density,avg_orbital_eccentricity,average_insolation_flux,average_equilibrium_temperature,average_inclination,average_obliquity,average_ratio_planet_stellar_radius
0,80000.79519304613,10.194265524570763,27302.995774086387,4.567887420389565e+27,3.286964238636359,0.1556753505385996,603363.2343128654,1019.4108391608394,85.92666542980827,39.0,0.0421291231343283


4. **RQ4**: Which exoplanet most closely resembles Earth?

In [9]:
rq4_query = prefixes + """
SELECT ?exoplanet_label ?difference_orbital_period_relative_to_Earth ?star_label ?star_mass_value ?star_radius_value
WHERE {
    ?exoplanet a ex:Exoplanet ;
        rdfs:label ?exoplanet_label ;
        exo:orbital_period ?orbital_period ;
        exo:orbits_around ?star .
    ?orbital_period schema:value ?orbital_period_value . 
    ?star a ex:Star ;
        rdfs:label ?star_label ;
        cb:mass ?star_mass ;
        cb:radius ?star_radius ;
        star:has_spectral_type ?spectral_type .
    ?star_mass schema:unitCode unit:solar_mass ;
        schema:value ?star_mass_value .
    ?star_radius schema:unitCode unit:solar_radius ;
        schema:value ?star_radius_value .
    ?spectral_type a star:Spectral_type ;
        rdfs:label ?spectral_type_label .
    FILTER(CONTAINS(LCASE(?spectral_type_label), "g2"))
    BIND(ABS(?orbital_period_value - 365) AS ?difference_orbital_period_relative_to_Earth)
}
ORDER BY ?difference_orbital_period_relative_to_Earth ?star_mass_value ?star_radius_value
LIMIT 1
"""

run_query(ExoKG, rq4_query)

Unnamed: 0,exoplanet_label,difference_orbital_period_relative_to_Earth,star_label,star_mass_value,star_radius_value
0,Kepler-452 b,19.843000000000018,Kepler-452,0.87,0.79
