# DataNode Type Counts

In [2]:
from pip._internal import main as pip
try:
    import csv
except ImportError:
    pip(['install', 'csv'])
    import csv
try:
    from SPARQLWrapper import SPARQLWrapper, JSON
except  ImportError:
    pip(['install', 'sparqlwrapper'])
    from SPARQLWrapper import SPARQLWrapper, JSON
    
import pandas
    
sparql = SPARQLWrapper("http://sparql.wikipathways.org/sparql")
DataNodeCount = pandas.DataFrame(columns=['DataNode Type', 'DataNode Count'])

pathwayQuery = '''
      SELECT Distinct (substr(str(?o),41) as ?DNType) (COUNT(?o) as ?DNCount)
      WHERE {
        ?entity a ?o . 
        ?entity a wp:DataNode . 
        ?entity dcterms:isPartOf ?pathway .
        ?pathway    a wp:Pathway ;
            wp:ontologyTag <http://vocabularies.wikipathways.org/wp#Curation:AnalysisCollection> ;
            wp:organismName        "Homo sapiens"^^xsd:string .
         
      } GROUP BY ?o
        ORDER BY DESC (?DNCount)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	DataNodeCount = DataNodeCount.append({
		'DataNode Type': result["DNType"]["value"],
        'DataNode Count': result["DNCount"]["value"],
	 }, ignore_index=True)
    
DNCountInteger = DataNodeCount.loc[:, "DataNode Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 

In [3]:
DataNodeCount

Unnamed: 0,DataNode Type,DataNode Count
0,DataNode,24220
1,GeneProduct,18832
2,Protein,6881
3,Metabolite,3547
4,Rna,1106
5,Complex,33


# GPMLRDF datanodes WITHOUT a WPRDF datanode equivalent 

In [4]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org/sparql")
GpmlWoWP = pandas.DataFrame(columns=['DataNode Type', 'DataNode Count'])

pathwayQuery = '''
      SELECT (STR(?typeIRI) AS ?type) (COUNT(?datanode) AS ?count) WHERE {
          ?datanode a gpml:DataNode ; 
            gpml:type ?typeIRI .
          ?datanode dcterms:isPartOf ?pathway .
          ?wpPathway    a wp:Pathway ;
            wp:isAbout ?pathway ;
            wp:ontologyTag <http://vocabularies.wikipathways.org/wp#Curation:AnalysisCollection> ;
            wp:organismName        "Homo sapiens"^^xsd:string .
          MINUS { ?wpDatanode wp:isAbout ?datanode } .
       } GROUP BY ?typeIRI
         ORDER BY DESC(?count)

    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	GpmlWoWP = GpmlWoWP.append({
		'DataNode Type': result["type"]["value"],
        'DataNode Count': result["count"]["value"],
	 }, ignore_index=True)
    
GpmlWoWPInteger = GpmlWoWP.loc[:, "DataNode Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 

In [5]:
GpmlWoWP

Unnamed: 0,DataNode Type,DataNode Count
0,GeneProduct,417
1,Unknown,274
2,Pathway,224
3,Metabolite,223
4,Protein,165
5,Rna,48
6,Complex,18


# % GPML RDF datanodes WITHOUT a WP RDF datanode equivalent

In [6]:
percentGPWoWP = ( GpmlWoWPInteger / (GpmlWoWPInteger + DNCountInteger) ) * 100
formPercentGPWoWP = str(round(percentGPWoWP, 3)) + '% of GPML datanodes WITHOUT a WP datanode equivalent'

In [7]:
formPercentGPWoWP

'2.445% of GPML datanodes WITHOUT a WP datanode equivalent'

#  The Complex GPMLRDF datanodes without WPRDF equivalents

In [8]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org/sparql")
CompGpmlWoWp = pandas.DataFrame(columns=['Complex DataNodes'])

pathwayQuery = '''
      SELECT  (substr(str(?datanode),37 ) as ?datanodes ) ?label
      WHERE {
          ?datanode a gpml:DataNode ; gpml:type "Complex"^^xsd:string .
          ?datanode dcterms:isPartOf ?pathway .
          ?wpPathway    a wp:Pathway ;
            wp:isAbout ?pathway ;
            wp:ontologyTag <http://vocabularies.wikipathways.org/wp#Curation:AnalysisCollection> ;
            wp:organismName        "Homo sapiens"^^xsd:string .
          MINUS { ?wpDatanode wp:isAbout ?datanode }
          OPTIONAL {?datanode gpml:textlabel ?label . }
          
        }

    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	CompGpmlWoWp = CompGpmlWoWp.append({
		'Complex DataNodes': result["datanodes"]["value"],
        'Complex Labels': result["label"]["value"],
	 }, ignore_index=True)

In [9]:
CompGpmlWoWp

Unnamed: 0,Complex DataNodes,Complex Labels
0,WP2806_r97328/DataNode/a680a,C1q
1,WP2806_r97328/DataNode/b45a6,C3bB3bP
2,WP2806_r97328/DataNode/e4901,C3bB3b
3,WP2806_r97328/DataNode/f51e9,C4b2b
4,WP2806_r97328/DataNode/f7d7c,C3bBbP
5,WP2806_r97328/DataNode/fd0cb,C3bBb
6,WP2806_r97328/DataNode/fd947,C4b2b3b
7,WP2509_r92315/DataNode/c040f,mTORC1
8,WP391_r71373/DataNode/fc7a3,PKA
9,WP4197_r95608/DataNode/d7ca2,DRIP150
