# Loading the LiveDeploys dump

In [35]:
from rdflib import ConjunctiveGraph
import seaborn as sns
import pandas as pd

kg = ConjunctiveGraph()
kg.parse("out.nq", format="nquads")
print(len(kg))

74245


# Most used classes

In [36]:
query = """
    SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY desc(?count)
"""

results = kg.query(query)
data = []
for r in results:
    data.append({"class":r[0], "count":r[1]})
    print(r[0], r[1])
    
df = pd.DataFrame.from_dict(data)
df

http://schema.org/CreativeWork 14916
http://schema.org/BioChemEntity 1085
http://schema.org/DataDownload 910
http://schema.org/creativeWork 714
https://bioschemas.org/Taxon 714
http://schema.org/Dataset 524
http://schema.org/Organization 307
http://schema.org/Person 256
http://schema.org/DefinedTerm 159
http://schema.org/MolecularEntity 113
http://schema.org/DataSet 78
http://schema.org/TaxonName 70
http://schema.org/ChemicalSubstance 68
http://schema.org/SequenceAnnotation 62
http://schema.org/ScholarlyArticle 50
http://schema.org/PropertyValue 47
http://schema.org/SequenceRange 40
http://schema.org/DataCatalog 32
http://schema.org/PostalAddress 30
http://schema.org/WebPage 28
http://schema.org/LearningResource 28
http://schema.org/Audience 26
http://schema.org/WebSite 21
http://schema.org/Place 19
http://schema.org/Protein 16
http://schema.org/NGO 16
http://schema.org/Offer 16
http://schema.org/BusinessEntityType 16
http://schema.org/Event 14
http://schema.org/ImageObject 14
http://s

Unnamed: 0,class,count
0,http://schema.org/CreativeWork,14916
1,http://schema.org/BioChemEntity,1085
2,http://schema.org/DataDownload,910
3,http://schema.org/creativeWork,714
4,https://bioschemas.org/Taxon,714
...,...,...
82,http://xmlns.com/foaf/0.1/Document,1
83,http://rdfs.org/sioc/ns#Item,1
84,http://xmlns.com/foaf/0.1/Image,1
85,http://schema.org/WebPageElement,1


# Most used properties

In [39]:
query = """
    SELECT ?p (COUNT(?s) AS ?count ) { ?s ?p ?o } GROUP BY ?p ORDER BY desc(?count)
"""

results = kg.query(query)
data = []
for r in results:
    data.append({"property":r[0], "count":r[1]})
    print(r[0], r[1])
    
df = pd.DataFrame.from_dict(data)
df
    


http://www.w3.org/1999/02/22-rdf-syntax-ns#type 20595
http://schema.org/name 20537
http://schema.org/url 12281
http://schema.org/about 5716
http://schema.org/identifier 1616
http://schema.org/taxonRank 1158
http://schema.org/taxonomicRange 1083
http://schema.org/parentTaxon 1078
http://schema.org/studySubject 1074
http://schema.org/@Type 1074
http://schema.org/distribution 916
http://schema.org/description 607
http://schema.org/keywords 578
http://purl.org/dc/terms/conformsTo 562
http://schema.org/version 444
http://schema.org/author 311
http://schema.org/license 200
http://schema.org/contentURL 178
http://schema.org/creator 164
http://schema.org/sameAs 161
http://schema.org/alternateName 157
http://schema.org/associatedDisease 123
http://schema.org/isInvolvedInBiologicalProcess 99
http://schema.org/email 93
http://schema.org/givenName 88
http://schema.org/familyName 88
http://schema.org/image 86
http://schema.org/dataset 84
http://schema.org/logo 80
http://schema.org/encodingFormat 80

Unnamed: 0,property,count
0,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,20595
1,http://schema.org/name,20537
2,http://schema.org/url,12281
3,http://schema.org/about,5716
4,http://schema.org/identifier,1616
...,...,...
250,http://schema.org/breadcrumb,1
251,http://ogp.me/ns#updated_time,1
252,http://schema.org/primaryImageOfPage,1
253,http://schema.org/legalName,1


# DefinedTerms

In [41]:
query = """
SELECT Distinct ?s WHERE {
    VALUES ?o { <http://schema.org/DefinedTerm> <http://schema.org/DefinedTermSet> }
    ?s ?p ?o
} 

"""

results = kg.query(query)
for r in results:
    print(r["s"])

Neb84024e8ce148a09ba564ab315883ff
N2aee2730fd484ecfbe110d63a70998a3
N2bc495323805416f9aeea6e59d8ca561
Na1b2284bae6c41ecaf0c6267d85b6885
N7751f4816429488690db607f9153c1e8
Nacec4499ebe041e6b8da2c3752d974d6
https://disprot.org/IDPO/IDPO:00499
https://disprot.org/IDPO/IDPO:00076
Ncfdddc3d7fba4a3c9872d2b40b3b806e
http://edamontology.org/topic_4019
http://edamontology.org/topic_3474
http://edamontology.org/topic_0091
https://disprot.org/IDPO/GO:0140677
https://disprot.org/IDPO/IDPO:00025
https://disprot.org/IDPO/GO:0031625
Nf26c9a5dc03540ed9f8ec251ff98c34b
https://disprot.org/IDPO/IDPO:00506
https://disprot.org/IDPO/IDPO:00026
https://disprot.org/IDPO/IDPO:00027
https://disprot.org/IDPO/IDPO:00050
https://disprot.org/IDPO/GO:0005515
https://disprot.org/IDPO/GO:0071889
http://www.ebi.ac.uk/SAMEA104493311
http://www.ebi.ac.uk/efo/EFO_0004236
edam:http:/edamontology.org/operation_2421
edam:http:/edamontology.org/topic_3174
edam:http:/edamontology.org/topic_3837
N4627ad2844544795a6112ed90ce6b7cf