In [3]:
import pandas as pd
from rdflib import Graph
from rdflib.plugins.sparql.processor import SPARQLResult

In [4]:
def sparql_results_to_df(results: SPARQLResult) -> pd.DataFrame:
    """
    Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
    using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
    """
    return pd.DataFrame(
        data=([None if x is None else x.toPython() for x in row] for row in results),
        columns=[str(x) for x in results.vars],
    )

In [5]:
g = Graph()
g.parse("dataset/graph/ddidpi.nt", format="ttl")

<Graph identifier=Na065cdcf78bf4094ad220f88ad79bc20 (<class 'rdflib.graph.Graph'>)>

In [11]:
query = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ddidpi: <http://www.project-ddidpi.de/vocab/#>
prefix pubmed: <http://purl.uniprot.org/pubmed/>
    
select distinct ?precipitant ?precipitant_dbpedia ?object ?object_dbpedia ?effect ?impact
                ?protein ?category ?action ?gene ?organism ?pubmed
where {
    ?ddi <http://www.project-ddidpi.de/vocab/hasPrecipitantDrug> ?precipitant .
    ?precipitant <http://www.project-ddidpi.de/vocab/hasdbpediaResource> ?precipitant_dbpedia .
    ?ddi <http://www.project-ddidpi.de/vocab/hasObjectDrug> ?object .
    ?object <http://www.project-ddidpi.de/vocab/hasdbpediaResource> ?object_dbpedia .
    ?ddi <http://www.project-ddidpi.de/vocab/hasEffect> ?effect .
    ?ddi <http://www.project-ddidpi.de/vocab/hasImpact> ?impact .
    
    ?dpi <http://www.project-ddidpi.de/vocab/hasDrugID> ?object .
    ?dpi <http://www.project-ddidpi.de/vocab/hasProteinID> ?protein .
    ?dpi <http://www.project-ddidpi.de/vocab/hasCategory> ?category .
    ?dpi <http://www.project-ddidpi.de/vocab/hasActions> ?action .
    ?dpi <http://www.project-ddidpi.de/vocab/hasEntrez_gene_id> ?gene .
    ?dpi <http://www.project-ddidpi.de/vocab/hasOrganism> ?organism .
    ?dpi <http://www.project-ddidpi.de/vocab/hasPubMedID> ?pubmed .
    
    } LIMIT 20
    """

#qres = g.query(query)
#df1 = sparql_results_to_df(qres)
#df1
results = g.query(query)

AttributeError: can't set attribute

In [None]:
import pandas as pd

# results is a rdflib.plugins.sparql.processor.SPARQLResult object

# renders properly in notebooks, but DataFrame values are rdflib objects rather than builtin python types
pd.DataFrame(results.bindings)

# converts everything to strings including missing values
pd.DataFrame(results.bindings).applymap(str).rename(columns=str)

# serialize with json and then parse (clobbers types, converting values to strings)
import json
results_json = results.serialize(format="json")
bindings = json.loads(results_json)["results"]["bindings"]
bindings = [{k: v["value"] for k, v in result.items()} for result in bindings]
pd.DataFrame(bindings)

In [2]:
lines = []
with open("dataset/graph/ddidpi.nt") as f:
    lines = f.readlines()

In [11]:
path = 'dataset/graph/'
def save_graph(path, line):
    with open(path + 'plot_ddidpi.csv', 'a') as file:
        file.write(line)
save_graph(path, 'Source Label Target\n')

count = 0
for line in lines:
    count += 1
    #print(f'line {count}: {line}')
    a = line.replace('>.', '>')# + '\n'
    a = a.replace('<http://www.project-ddidpi.de/DPI/', '')
    a = a.replace('<http://purl.uniprot.org/pubmed/', 'pubmed:')
    a = a.replace('<http://www.project-ddidpi.de/vocab/DPI>', 'DPI')
    a = a.replace('<http://www.project-ddidpi.de/vocab/', '')
    a = a.replace('<http://www.w3.org/1999/02/22-rdf-syntax-ns#', '')
    a = a.replace('<http://www.project-ddidpi.de/DDI/', '')
    a = a.replace('^^<http://www.w3.org/2001/XMLSchema#string', '')
    a = a.replace('<http://www.project-ddidpi.de/Drug/', 'Drug:') 
    
    a = a.replace('<http://purl.uniprot.org/uniprot/', 'uniprot:') 
    a = a.replace('<http://dbpedia.org/resource/', '') 
    a = a.replace('<http://www.project-ddidpi.de/Annotation/', '') 
    a = a.replace('<http://www.project-ddidpi.de/Protein/', 'Protein:') 
    a = a.replace('<http://purl.uniprot.org/geneid/', 'gene:') 
    
    a = a.replace('>', '')
    save_graph(path, a)