# Number of Non-Directed Interactions

In [133]:
from pip._internal import main as pip
try:
    import csv
except ImportError:
    pip(['install', 'csv'])
    import csv
try:
    from SPARQLWrapper import SPARQLWrapper, JSON
except  ImportError:
    pip(['install', 'sparqlwrapper'])
    from SPARQLWrapper import SPARQLWrapper, JSON
    
import pandas
    
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
NonDirected = pandas.DataFrame(columns=['NonDirected Interactions Count'])
pathwayQuery = '''
      SELECT DISTINCT (count(?entity) as ?NonDirectInt)
      WHERE {
        ?entity a wp:Interaction . 
        MINUS { ?entity a wp:DirectedInteraction .}
      }  
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	NonDirected = NonDirected.append({
		'NonDirected Interactions Count': result["NonDirectInt"]["value"],
	 }, ignore_index=True)

In [134]:
NonDirected

Unnamed: 0,NonDirected Interactions Count
0,32164


# Count of Interaction Types

In [135]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntTypesCount = pandas.DataFrame(columns=['Interaction Type', 'Interaction Count'])

pathwayQuery = '''
      SELECT (substr(str(?o),41) as ?IntType) (COUNT(?o) as ?IntCount)
      WHERE {
        ?entity a ?o . 
        ?entity a wp:Interaction . 
      } GROUP BY ?o
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	IntTypesCount = IntTypesCount.append({
		'Interaction Type': result["IntType"]["value"],
        'Interaction Count': result["IntCount"]["value"],
	 }, ignore_index=True)

In [136]:
IntTypesCount

Unnamed: 0,Interaction Type,Interaction Count
0,Conversion,1761
1,Binding,11034
2,Interaction,75046
3,DirectedInteraction,42882
4,Catalysis,7063
5,TranscriptionTranslation,497
6,ComplexBinding,11034
7,Inhibition,2722
8,Stimulation,1286


# Interaction Count with Unspecified Type

In [137]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
Nonspecific = pandas.DataFrame(columns=['NonSpecified Interaction Type Count'])

pathwayQuery = '''
      SELECT DISTINCT (count(?entity) as ?NonSpecific)
      WHERE {
        ?entity a wp:Interaction . 
        MINUS { 
        { ?entity a wp:DirectedInteraction .  }
          UNION
        { ?entity a wp:ComplexBinding .  }
        }
      }  
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	Nonspecific = Nonspecific.append({
		'NonSpecified Interaction Type Count': result["NonSpecific"]["value"],
	 }, ignore_index=True)

In [138]:
Nonspecific

Unnamed: 0,NonSpecified Interaction Type Count
0,21130


# Participants for Interactions: 

In [139]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
parts4Ints = pandas.DataFrame(columns=['Interaction', 'Interaction Type', 'Interaction Participants'])


pathwayQuery = '''

    SELECT DISTINCT (substr(str(?interaction),37 ) as ?interactionId ) (substr(str(?intType), 41 ) as ?types) (GROUP_CONCAT(DISTINCT substr(str(?partType),41), ", ") AS ?participants)  WHERE {
       ?interaction a wp:Interaction ;
           a ?intType .
       ?interaction wp:participants ?participant .
       ?participant a ?partType .

       FILTER (?partType != wp:DataNode)
       FILTER (?intType != wp:Interaction)
    } 
    LIMIT 20
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    parts4Ints = parts4Ints.append({
        'Interaction': result["interactionId"]["value"],
        'Interaction Type': result["types"]["value"],
        'Interaction Participants': result["participants"]["value"],
       
    }, ignore_index=True)

In [140]:
parts4Ints

Unnamed: 0,Interaction,Interaction Type,Interaction Participants
0,WP3354_r94015/ComplexBinding/a8a0b,ComplexBinding,"Complex, Metabolite, Protein"
1,WP2533_r98235/WP/Interaction/e1d3e,DirectedInteraction,Metabolite
2,WP1333_r88657/ComplexBinding/e6af2,ComplexBinding,"Complex, GeneProduct"
3,WP3305_r93778/ComplexBinding/eaca5,Binding,"Complex, GeneProduct, Protein"
4,WP1971_r98351/WP/Interaction/idfeabdadc,Stimulation,Protein
5,WP4124_r93627/WP/Interaction/ef2df,DirectedInteraction,"DirectedInteraction, Interaction, Protein"
6,WP2650_r93953/WP/Interaction/c2ec5,DirectedInteraction,Metabolite
7,WP3271_r89935/WP/Interaction/ed8f2,DirectedInteraction,"GeneProduct, Protein"
8,WP1817_r87151/WP/Interaction/ef7cc,DirectedInteraction,"Complex, DirectedInteraction, Interaction"
9,WP3338_r94045/WP/Interaction/e11b7,DirectedInteraction,"Complex, Metabolite"


# Interaction counts by participants

In [141]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntCounts4Parts = pandas.DataFrame(columns=['Interaction Participants', 'Interaction Count'])

pathwayQuery = '''

    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT ?participants (COUNT(?interaction) AS ?count) WHERE {
      SELECT ?interaction (GROUP_CONCAT(substr(str(?partType),41), ", ") AS ?participants) WHERE {
        ?interaction a wp:Interaction . 
        ?interaction wp:participants ?participant .
        ?participant a ?partType .
        FILTER (?partType != wp:DataNode)
      } GROUP BY ?interaction
    } GROUP BY ?participants
      ORDER BY DESC(?count)
      LIMIT 20
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    IntCounts4Parts = IntCounts4Parts.append({
        'Interaction Participants': result["participants"]["value"],
        'Interaction Count': result["count"]["value"],
       
    }, ignore_index=True)

In [142]:
IntCounts4Parts

Unnamed: 0,Interaction Participants,Interaction Count
0,"GeneProduct, GeneProduct",6071
1,"Metabolite, Metabolite",5692
2,"Complex, DirectedInteraction, Interaction",2973
3,"Protein, GeneProduct, Protein, GeneProduct",2680
4,"DirectedInteraction, Interaction, Complex",1940
5,Metabolite,1720
6,"Complex, Complex, Complex",1324
7,GeneProduct,1238
8,"Metabolite, Metabolite, Metabolite, Metabolite",1114
9,"GeneProduct, Protein, GeneProduct",1014


# Identifier IDs by data source

In [143]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntIDs = pandas.DataFrame(columns=['Database Source', 'Interactions Counted'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT Distinct (count(?Interaction) as ?IntCount) ?database 
       WHERE {
          ?Interaction a gpml:Interaction ;
             gpml:xrefDataSource ?database .
          
       } ORDER BY DESC(?IntCount)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	IntIDs = IntIDs.append({
		'Database Source': result["database"]["value"],
        'Interactions Counted': result["IntCount"]["value"],
	 }, ignore_index=True)

In [144]:
IntIDs

Unnamed: 0,Database Source,Interactions Counted
0,Reactome,62638
1,Uniprot-TrEMBL,213
2,KEGG Reaction,53
3,KEGG Pathway,29
4,WikiPathways,24
5,Rhea,11
6,pato,8
7,kegg.compound,8
8,WormBase,6
9,ChEBI,6


# GPML RDF Interactions WITHOUT a WP RDF equivalent

In [145]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
noWPINT = pandas.DataFrame(columns=['Interactions Counted'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT (COUNT(?Interaction) AS ?count) 
    WHERE {

          ?Interaction a gpml:Interaction .
          MINUS {?wpInteraction wp:isAbout ?Interaction}

        } 
          ORDER BY DESC(?count)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	noWPINT = noWPINT.append({
        'Interactions Counted': result["count"]["value"],
	 }, ignore_index=True)

In [146]:
noWPINT

Unnamed: 0,Interactions Counted
0,46869


# GPML RDF Interactions WITH a WP RDF equivalent and type

In [147]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
wpIntTypes = pandas.DataFrame(columns=['Interaction Type', 'Interactions Count'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT (substr(STR(?IntType),41) AS ?type) (COUNT(?InteractionWP) AS ?count) 
    WHERE {

          ?Interaction a gpml:Interaction .
          ?InteractionWP  wp:isAbout ?Interaction ;
            a ?IntType .

        } GROUP BY ?IntType
          ORDER BY DESC(?count)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	wpIntTypes = wpIntTypes.append({
		'Interaction Type': result["type"]["value"],
        'Interactions Count': result["count"]["value"],
	 }, ignore_index=True)

In [148]:
wpIntTypes

Unnamed: 0,Interaction Type,Interactions Count
0,Interaction,108746
1,DirectedInteraction,87372
2,Catalysis,7063
3,Inhibition,2733
4,Conversion,2167
5,Stimulation,1294
6,TranscriptionTranslation,531


# Coverage % for GPML Interaction with Equivalent WP RDF out of Total GPML Interactions

In [149]:
IntTable = wpIntTypes.loc[:,"Interactions Count"]
wpInteractionType = IntTable.apply(pandas.to_numeric, errors='ignore').sum() 
noWPIntTable = noWPINT.loc[:,'Interactions Counted']
noWPIntIneger = noWPIntTable.apply(pandas.to_numeric, errors = 'ignore').sum()
totalWPInt = noWPIntIneger + wpInteractionType
coverageInt = (wpInteractionType / totalWPInt) * 100
formPercentInt = str(round(coverageInt, 3)) + "% of GPML Interactions with an WP equivalent"

In [150]:
formPercentInt

'81.747% of GPML Interactions with an WP equivalent'

# % of WP RDF Interactions that are of unspecified type

In [151]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntTotalCount = pandas.DataFrame(columns=['Interaction Count'])

pathwayQuery = '''
      SELECT DISTINCT (COUNT(?genInteraction) as ?IntCount)
      WHERE {
        ?genInteraction a wp:Interaction . 
      }
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	IntTotalCount = IntTotalCount.append({
        'Interaction Count': result["IntCount"]["value"],
	 }, ignore_index=True)

NonSpecificTable = Nonspecific.loc[:,"NonSpecified Interaction Type Count"]
NonSpecificInteger = NonSpecificTable.apply(pandas.to_numeric, errors = 'ignore').sum()
TotalInteractions = IntTotalCount.loc[:, "Interaction Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 
percentNonSpecific = (NonSpecificInteger / TotalInteractions) * 100
formPercentNonSpecific = str(round(percentNonSpecific,3)) + '% of WP RDF Interaction with Unspecified Interaction Type'

In [152]:
formPercentNonSpecific

'28.156% of WP RDF Interaction with Unspecified Interaction Type'

In [154]:
formPercentNonSpecific

'28.156% of WP RDF Interaction with Unspecified Interaction Type'

# % of NonDirected Interactions

In [163]:
NonDirectedInteger = NonDirected.loc[:, "NonDirected Interactions Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 
percentNonDirected = (NonDirectedInteger / TotalInteractions) * 100
#check this number
formPercentNonDirected = str(round(percentNonDirected, 3)) + '% of Non-directed interaction in WP RDF'

In [164]:
formPercentNonDirected

'42.859% of Non-directed interaction in WP RDF'