# Loading the LiveDeploys dump

In [27]:
from rdflib import ConjunctiveGraph
import seaborn as sns
import pandas as pd

kg = ConjunctiveGraph()
kg.parse("out.nq", format="nquads")
print(len(kg))

18255


# Most used classes

In [28]:
query = """
    SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY desc(?count)
"""

results = kg.query(query)
data = []
for r in results:
    data.append({"class":r[0], "count":r[1]})
    print(r[0], r[1])
    
df = pd.DataFrame.from_dict(data)
df

http://schema.org/BioChemEntity 1084
http://schema.org/Person 235
http://schema.org/Organization 201
http://schema.org/DefinedTerm 154
http://schema.org/Dataset 113
http://schema.org/DataDownload 105
http://schema.org/DataSet 80
http://schema.org/CreativeWork 74
http://schema.org/TaxonName 70
http://schema.org/SequenceAnnotation 62
http://schema.org/PropertyValue 49
http://schema.org/ScholarlyArticle 41
http://schema.org/SequenceRange 40
http://schema.org/Audience 32
http://schema.org/DataCatalog 31
http://schema.org/LearningResource 28
http://schema.org/PostalAddress 21
http://schema.org/Offer 18
http://schema.org/Place 17
http://schema.org/BusinessEntityType 16
http://schema.org/Event 13
http://schema.org/CourseInstance 12
http://schema.org/Taxon 11
http://schema.org/NGO 11
http://schema.org/Course 8
http://schema.org/MolecularEntity 8
http://schema.org/Text 6
http://schema.org/ListItem 6
http://schema.org/SoftwareApplication 6
http://schema.org/PublicationEvent 5
http://schema.org/D

Unnamed: 0,class,count
0,http://schema.org/BioChemEntity,1084
1,http://schema.org/Person,235
2,http://schema.org/Organization,201
3,http://schema.org/DefinedTerm,154
4,http://schema.org/Dataset,113
...,...,...
73,http://schema.org/WebApplication,1
74,http://schema.org/ComputerLanguage,1
75,http://schema.org/ComputationalWorkflow,1
76,http://schema.org/SoftwareSourceCode,1


# Most used properties

In [29]:
query = """
    SELECT ?p (COUNT(?s) AS ?count ) { ?s ?p ?o } GROUP BY ?p ORDER BY desc(?count)
"""

results = kg.query(query)
data = []
for r in results:
    data.append({"property":r[0], "count":r[1]})
    print(r[0], r[1])
    
df = pd.DataFrame.from_dict(data)
df
    


http://schema.org/name 3285
http://www.w3.org/1999/02/22-rdf-syntax-ns#type 2669
http://schema.org/identifier 1300
http://schema.org/taxonRank 1158
http://schema.org/taxonomicRange 1083
http://schema.org/parentTaxon 1078
http://schema.org/@Type 1074
http://schema.org/studySubject 1074
http://schema.org/url 558
http://schema.org/keywords 407
http://schema.org/description 356
http://schema.org/author 286
http://purl.org/dc/terms/conformsTo 280
http://schema.org/alternateName 141
http://schema.org/sameAs 116
http://schema.org/distribution 104
http://schema.org/isInvolvedInBiologicalProcess 99
http://schema.org/license 96
http://schema.org/email 94
http://schema.org/givenName 90
http://schema.org/familyName 90
http://schema.org/dataset 81
http://schema.org/contentURL 80
http://schema.org/valueReference 70
http://schema.org/encodingFormat 70
http://schema.org/isBasedOn 69
http://schema.org/version 67
http://schema.org/image 65
http://schema.org/sequenceLocation 64
http://schema.org/alternat

Unnamed: 0,property,count
0,http://schema.org/name,3285
1,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,2669
2,http://schema.org/identifier,1300
3,http://schema.org/taxonRank,1158
4,http://schema.org/taxonomicRange,1083
...,...,...
230,http://schema.org/eligibility,1
231,http://schema.org/worksFor,1
232,http://ogp.me/ns/fb#app_id,1
233,http://schema.org/query-input,1


# DefinedTerms

In [30]:
query = """

SELECT ?s WHERE {
    ?s ?p <http://schema.org/DefinedTerm>
} 

"""

In [31]:
results = kg.query(query)

for r in results:
    print(r["s"])

Nbacc192adb6048fb91ae0b5e14edcabc
https://disprot.org/IDPO/IDPO:00076
https://disprot.org/IDPO/IDPO:00499
N0269a97277d145f5b098310e7b7f97bd
Na15a6b69b9214a60b6216d903210710a
N01ac55c11aed4ebbb0b9bb2fd7c2e072
N9cdd0a1e8c734a988aae4bfbffd692a7
N98f1d8cabccf408cbb1ae89b2788a5fd
Ne14891a7070f4685bfeed9a82783ba55
http://edamontology.org/topic_3474
http://edamontology.org/topic_0091
http://edamontology.org/topic_4019
N705d925bee6f46c58a0e79e87831d193
http://www.ebi.ac.uk/SAMEA104493311
http://www.ebi.ac.uk/efo/EFO_0004236
http://edamontology.org/topic_3063
http://edamontology.org/topic_0089
https://disprot.org/IDPO/IDPO:00026
https://disprot.org/IDPO/GO:0140677
https://disprot.org/IDPO/IDPO:00506
https://disprot.org/IDPO/GO:0005515
https://disprot.org/IDPO/IDPO:00050
https://disprot.org/IDPO/IDPO:00025
https://disprot.org/IDPO/IDPO:00027
Ne841cf09d312445ca09655bb3f255fdf
https://disprot.org/IDPO/GO:0071889
https://disprot.org/IDPO/GO:0031625
http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:0034774
h