In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from rdflib import Graph
from rdflib.namespace import RDF
import re
import sys

from rdflib.plugins.sparql.processor import SPARQLResult

def sparql_results_to_df(results: SPARQLResult) -> pd.DataFrame:
    """
    Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
    using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
    """
    return pd.DataFrame(
        data=([None if x is None else x.toPython() for x in row] for row in results),
        columns=[str(x) for x in results.vars],
    )

In [2]:
g1 = Graph()
g1.parse("../../store_data/ImProVIT/All_Measurement_parameters_D0_HBsRE_v5_1.nt", format="nt")
len(g1)

32551

In [3]:
query = """
prefix improvit: <http://www.project-improvit.de/vocab/> 
select ?s (count(?o) as ?NumOc)
                where {
                {?s ?p ?o}
                UNION
                {?o ?p ?s}
                }
                GROUP BY ?s
"""
qres = g1.query(query)
entity = sparql_results_to_df(qres)
entity

Unnamed: 0,s,NumOc
0,http://www.project-improvit.de/Measurement/CD1...,6
1,http://www.project-improvit.de/Measurement/CD1...,6
2,http://www.project-improvit.de/Donor/HBsRE_2,156
3,http://www.project-improvit.de/Measurement/CD1...,6
4,http://www.project-improvit.de/Measurement/CD1...,6
...,...,...
7581,29.87,1
7582,0.0677,1
7583,0.4041,1
7584,0.364,1


In [4]:
entity = entity.loc[entity.NumOc>1]
entity = entity.dropna(subset=['s'])
entity = entity[entity.s.str.contains('http:', na=False)]
entity

Unnamed: 0,s,NumOc
0,http://www.project-improvit.de/Measurement/CD1...,6
1,http://www.project-improvit.de/Measurement/CD1...,6
2,http://www.project-improvit.de/Donor/HBsRE_2,156
3,http://www.project-improvit.de/Measurement/CD1...,6
4,http://www.project-improvit.de/Measurement/CD1...,6
...,...,...
5101,http://www.project-improvit.de/Measurement/CD1...,6
5102,http://www.project-improvit.de/Measurement/CD1...,6
5103,http://www.project-improvit.de/Measurement/CD1...,6
5109,http://www.project-improvit.de/vocab/Measurement,5017


In [5]:
query = """
prefix improvit: <http://www.project-improvit.de/vocab/> 
select distinct ?p
       where {
                ?s ?p ?o
                }
                
"""
qres = g1.query(query)
relation = sparql_results_to_df(qres)
relation

Unnamed: 0,p
0,http://www.project-improvit.de/vocab/hasMeasur...
1,http://www.project-improvit.de/vocab/measureme...
2,http://www.project-improvit.de/vocab/hasBiosam...
3,http://www.project-improvit.de/vocab/hasParame...
4,http://www.project-improvit.de/vocab/hasExpPro...
...,...
67,http://www.project-improvit.de/vocab/measureme...
68,http://www.project-improvit.de/vocab/measureme...
69,http://www.project-improvit.de/vocab/measureme...
70,http://www.project-improvit.de/vocab/measureme...


In [7]:
len(relation.p.unique())

72