# Number of Non-Directed Interactions

In [1]:
from pip._internal import main as pip
try:
    import csv
except ImportError:
    pip(['install', 'csv'])
    import csv
try:
    from SPARQLWrapper import SPARQLWrapper, JSON
except  ImportError:
    pip(['install', 'sparqlwrapper'])
    from SPARQLWrapper import SPARQLWrapper, JSON
    
import pandas
    
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
NonDirected = pandas.DataFrame(columns=['NonDirected Interactions Count'])
pathwayQuery = '''
      SELECT DISTINCT (count(?entity) as ?NonDirectInt)
      WHERE {
        ?entity a wp:Interaction . 
        MINUS { ?entity a wp:DirectedInteraction .}
      }  
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	NonDirected = NonDirected.append({
		'NonDirected Interactions Count': result["NonDirectInt"]["value"],
	 }, ignore_index=True)

In [2]:
NonDirected

Unnamed: 0,NonDirected Interactions Count
0,32654


# Total WP Interactions

In [3]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
totalWPInts = pandas.DataFrame(columns=['Total WP Interactions'])
pathwayQuery = '''
      SELECT DISTINCT (count(?entity) as ?InteractionCount)
      WHERE {
        ?entity a wp:Interaction . 
      }  
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	totalWPInts = totalWPInts.append({
		'Total WP Interactions': result["InteractionCount"]["value"],
	 }, ignore_index=True)

formTotalWPInts = totalWPInts.loc[:, "Total WP Interactions"].apply(pandas.to_numeric, errors = 'ignore').sum()

In [4]:
totalWPInts

Unnamed: 0,Total WP Interactions
0,76840


# Count of Interaction Types

In [5]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntTypesCount = pandas.DataFrame(columns=['Interaction Type', 'Interaction Count'])

pathwayQuery = '''
      SELECT (substr(str(?o),41) as ?IntType) (COUNT(?o) as ?IntCount)
      WHERE {
        ?entity a ?o . 
        ?entity a wp:Interaction . 
      } GROUP BY ?o
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	IntTypesCount = IntTypesCount.append({
		'Interaction Type': result["IntType"]["value"],
        'Interaction Count': result["IntCount"]["value"],
	 }, ignore_index=True)

In [6]:
IntTypesCount

Unnamed: 0,Interaction Type,Interaction Count
0,DirectedInteraction,44186
1,ComplexBinding,11438
2,Interaction,76840
3,TranscriptionTranslation,508
4,Stimulation,1312
5,Binding,11438
6,Inhibition,2810
7,Conversion,2052
8,Catalysis,7133


# Interaction Count with Unspecified Type

In [7]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
Nonspecific = pandas.DataFrame(columns=['NonSpecified Interaction Type Count'])

pathwayQuery = '''
      SELECT DISTINCT (count(?entity) as ?NonSpecific)
      WHERE {
        ?entity a wp:Interaction . 
        MINUS { 
        { ?entity a wp:DirectedInteraction .  }
          UNION
        { ?entity a wp:ComplexBinding .  }
          UNION
        { ?entity a wp:Binding .  }
        }
      }  
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	Nonspecific = Nonspecific.append({
		'NonSpecified Interaction Type Count': result["NonSpecific"]["value"],
	 }, ignore_index=True)

In [8]:
Nonspecific

Unnamed: 0,NonSpecified Interaction Type Count
0,21216


# Participants for Interactions: 

In [9]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
parts4Ints = pandas.DataFrame(columns=['Interaction', 'Interaction Type', 'Interaction Participants'])


pathwayQuery = '''

    SELECT DISTINCT (substr(str(?interaction),37 ) as ?interactionId ) (substr(str(?intType), 41 ) as ?types) (GROUP_CONCAT(DISTINCT substr(str(?partType),41), ", ") AS ?participants)  WHERE {
       ?interaction a wp:Interaction ;
           a ?intType .
       ?interaction wp:participants ?participant .
       ?participant a ?partType .

       FILTER (?partType != wp:DataNode)
       FILTER (?intType != wp:Interaction)
    } 
    LIMIT 20
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    parts4Ints = parts4Ints.append({
        'Interaction': result["interactionId"]["value"],
        'Interaction Type': result["types"]["value"],
        'Interaction Participants': result["participants"]["value"],
       
    }, ignore_index=True)

In [10]:
parts4Ints

Unnamed: 0,Interaction,Interaction Type,Interaction Participants
0,WP1918_r101319/WP/Interaction/fc1fb,DirectedInteraction,"GeneProduct, Protein"
1,WP2712_r101443/WP/Interaction/add44,DirectedInteraction,Metabolite
2,WP897_r78369/WP/Interaction/be448,DirectedInteraction,"GeneProduct, Protein"
3,WP3804_r101304/ComplexBinding/abb84,ComplexBinding,"Complex, Protein"
4,WP1852_r101581/ComplexBinding/b0d08,ComplexBinding,"Complex, Metabolite, Protein"
5,WP4190_r101730/WP/Interaction/idf0db6c0a,Conversion,Metabolite
6,WP1858_r101227/WP/Interaction/e65e4,DirectedInteraction,"Complex, Metabolite"
7,WP15_r98241/WP/Interaction/bfc6e,DirectedInteraction,"GeneProduct, Metabolite, Protein"
8,WP2032_r89823/WP/Interaction/id533b3dfe,DirectedInteraction,"GeneProduct, Protein"
9,WP2700_r101483/WP/Interaction/a5496,DirectedInteraction,Complex


# Interaction counts by participants

In [11]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntCounts4Parts = pandas.DataFrame(columns=['Interaction Participants', 'Interaction Count'])

pathwayQuery = '''

    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT ?participants (COUNT(?interaction) AS ?count) WHERE {
      SELECT ?interaction (GROUP_CONCAT(substr(str(?partType),41), ", ") AS ?participants) WHERE {
        ?interaction a wp:Interaction . 
        ?interaction wp:participants ?participant .
        ?participant a ?partType .
        FILTER (?partType != wp:DataNode)
      } GROUP BY ?interaction
    } GROUP BY ?participants
      ORDER BY DESC(?count)
      LIMIT 20
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    IntCounts4Parts = IntCounts4Parts.append({
        'Interaction Participants': result["participants"]["value"],
        'Interaction Count': result["count"]["value"],
       
    }, ignore_index=True)

In [12]:
IntCounts4Parts

Unnamed: 0,Interaction Participants,Interaction Count
0,"Metabolite, Metabolite",6231
1,"GeneProduct, GeneProduct",6006
2,"Complex, DirectedInteraction, Interaction",4547
3,"GeneProduct, Protein, GeneProduct, Protein",2748
4,Metabolite,1753
5,"Protein, Protein, Complex",1626
6,"Protein, DirectedInteraction, Interaction",1447
7,"Complex, Complex, Complex",1370
8,"GeneProduct, Protein",1316
9,GeneProduct,1286


# Identifier IDs by data source

In [13]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntIDs = pandas.DataFrame(columns=['Database Source', 'Interactions Counted'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT Distinct (count(?Interaction) as ?IntCount) ?database 
       WHERE {
          ?Interaction a gpml:Interaction ;
             gpml:xrefDataSource ?database .
          
       } ORDER BY DESC(?IntCount)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	IntIDs = IntIDs.append({
		'Database Source': result["database"]["value"],
        'Interactions Counted': result["IntCount"]["value"],
	 }, ignore_index=True)

In [14]:
IntIDs

Unnamed: 0,Database Source,Interactions Counted
0,Reactome,63127
1,Uniprot-TrEMBL,213
2,KEGG Reaction,53
3,KEGG Pathway,28
4,WikiPathways,24
5,Rhea,11
6,pato,8
7,KEGG Compound,7
8,WormBase,6
9,ChEBI,6


# GPML RDF Interactions WITHOUT a WP RDF equivalent

In [15]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
noWPINT = pandas.DataFrame(columns=['Interactions Counted'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT (COUNT(?Interaction) AS ?count) 
    WHERE {

          ?Interaction a gpml:Interaction .
          MINUS {?wpInteraction wp:isAbout ?Interaction}

        } 
          ORDER BY DESC(?count)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	noWPINT = noWPINT.append({
        'Interactions Counted': result["count"]["value"],
	 }, ignore_index=True)

In [16]:
noWPINT

Unnamed: 0,Interactions Counted
0,47536


# GPML RDF Interactions WITH a WP RDF equivalent and type

In [37]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
wpIntTypes = pandas.DataFrame(columns=['Interaction Type', 'Interactions Count'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT (substr(STR(?IntType),41) AS ?type)  (COUNT(?InteractionWP) AS ?count) 
    WHERE {

        ?Interaction a gpml:Interaction .
        #    gpml:xrefDataSource ?provenence .
        ?InteractionWP  wp:isAbout ?Interaction ;
            a ?IntType .

        } GROUP BY ?IntType
          ORDER BY DESC(?count)
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	wpIntTypes = wpIntTypes.append({
		'Interaction Type': result["type"]["value"],
        'Interactions Count': result["count"]["value"],
	 }, ignore_index=True)

In [38]:
wpIntTypes

Unnamed: 0,Interaction Type,Interactions Count
0,Interaction,110628
1,DirectedInteraction,89170
2,Catalysis,7133
3,Inhibition,2821
4,Conversion,2477
5,Stimulation,1320
6,TranscriptionTranslation,542


# Coverage % for GPML Interaction with Equivalent WP RDF out of Total GPML Interactions

In [19]:
IntTable = wpIntTypes.loc[:,"Interactions Count"]
wpInteractionType = IntTable.apply(pandas.to_numeric, errors='ignore').sum() 
noWPIntTable = noWPINT.loc[:,'Interactions Counted']
noWPIntIneger = noWPIntTable.apply(pandas.to_numeric, errors = 'ignore').sum()
totalWPInt = noWPIntIneger + wpInteractionType
coverageInt = (wpInteractionType / totalWPInt) * 100
formPercentInt = str(round(coverageInt, 3)) + "% of GPML Interactions with an WP equivalent"

In [20]:
formPercentInt

'0.0% of GPML Interactions with an WP equivalent'

In [21]:
percentWoInt = 100 - coverageInt
str(round(percentWoInt, 3)) + "% of GPML Interactions without a WP equivalent"

'100.0% of GPML Interactions without a WP equivalent'

# Total GPML Interactions

In [22]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
GPMLCountTotal = pandas.DataFrame(columns=['Interactions Count'])

pathwayQuery = '''
    PREFIX gpml:    <http://vocabularies.wikipathways.org/gpml#>
    PREFIX dcterms: <http://purl.org/dc/terms/>
    PREFIX dc:      <http://purl.org/dc/elements/1.1/>
    PREFIX rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 

    SELECT DISTINCT (COUNT(?Interaction) AS ?count) 
    WHERE {

          ?Interaction a gpml:Interaction .
          
        } 
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	GPMLCountTotal = GPMLCountTotal.append({
        'Interactions Count': result["count"]["value"],
	 }, ignore_index=True)

GPMLCountTotalInt = GPMLCountTotal.loc[:,"Interactions Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 

In [23]:
GPMLCountTotalInt

157446

# % of WP RDF Interactions that are of unspecified type

In [24]:
sparql = SPARQLWrapper("http://sparql.wikipathways.org")
IntTotalCount = pandas.DataFrame(columns=['Interaction Count'])

pathwayQuery = '''
      SELECT DISTINCT (COUNT(?genInteraction) as ?IntCount)
      WHERE {
        ?genInteraction a wp:Interaction . 
      }
    '''
sparql.setQuery(pathwayQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
	IntTotalCount = IntTotalCount.append({
        'Interaction Count': result["IntCount"]["value"],
	 }, ignore_index=True)

NonSpecificTable = Nonspecific.loc[:,"NonSpecified Interaction Type Count"]
NonSpecificInteger = NonSpecificTable.apply(pandas.to_numeric, errors = 'ignore').sum()
TotalInteractions = IntTotalCount.loc[:, "Interaction Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 
percentNonSpecific = (NonSpecificInteger / TotalInteractions) * 100
formPercentNonSpecific = str(round(percentNonSpecific,3)) + '% of WP RDF Interaction with Unspecified Interaction Type'

In [25]:
formPercentNonSpecific

'27.611% of WP RDF Interaction with Unspecified Interaction Type'

In [26]:
percentWSpecificType = 100 - percentNonSpecific
str(round(percentWSpecificType, 3)) + "% of WP RDF Interactions with a specific WP RDF Interaction type"

'72.389% of WP RDF Interactions with a specific WP RDF Interaction type'

# % of NonDirected Interactions

In [27]:
NonDirectedInteger = NonDirected.loc[:, "NonDirected Interactions Count"].apply(pandas.to_numeric, errors = 'ignore').sum() 
percentNonDirected = (NonDirectedInteger / TotalInteractions) * 100
#check this number
formPercentNonDirected = str(round(percentNonDirected, 3)) + '% of Non-directed interaction in WP RDF'

In [28]:
formPercentNonDirected

'42.496% of Non-directed interaction in WP RDF'

In [29]:
ConversionCount = int(IntTypesCount.iloc[0]["Interaction Count"])
BindingCount = int(IntTypesCount.iloc[1]["Interaction Count"])
InteractionWPRDFCount = int(IntTypesCount.iloc[2]["Interaction Count"])
DirectedWPRDFCount = int(IntTypesCount.iloc[3]["Interaction Count"])
CatalysisCount = int(IntTypesCount.iloc[4]["Interaction Count"])
TranscrTranslaCount = int(IntTypesCount.iloc[5]["Interaction Count"])
complexCount = int(IntTypesCount.iloc[6]["Interaction Count"])
InhibitionCount = int(IntTypesCount.iloc[7]["Interaction Count"])
stimulationCount = int(IntTypesCount.iloc[8]["Interaction Count"])

ConversionCount, BindingCount, InteractionWPRDFCount, DirectedWPRDFCount, CatalysisCount, TranscrTranslaCount, complexCount, InhibitionCount, stimulationCount

(44186, 11438, 76840, 508, 1312, 11438, 2810, 2052, 7133)

In [30]:
IntTypesCount

Unnamed: 0,Interaction Type,Interaction Count
0,DirectedInteraction,44186
1,ComplexBinding,11438
2,Interaction,76840
3,TranscriptionTranslation,508
4,Stimulation,1312
5,Binding,11438
6,Inhibition,2810
7,Conversion,2052
8,Catalysis,7133


In [31]:
#noWPIntIneger without eq
#GPMLCountTotalInt GPML Total
percentComplex = (complexCount / GPMLCountTotalInt ) * 100
formpercentComplex = str(round(percentComplex, 3)) + '% of complex interactions out of all GPML Interactions'
percentComplexWPRDF = (complexCount / formTotalWPInts) * 100
formpercentComplexWPRDF = str(round(percentComplexWPRDF, 3)) + '% of complex interactions out of all WP Interactions'
formpercentComplex,  formpercentComplexWPRDF

('1.785% of complex interactions out of all GPML Interactions',
 '3.657% of complex interactions out of all WP Interactions')

In [32]:
#complexCount = int(IntTypesCount.iloc[6]["Interaction Count"])
PercentConversionCount = (ConversionCount / formTotalWPInts) * 100
formPercentConversionCount = str(round(PercentConversionCount, 3)) + '% of conversion interactions out of all WP Interactions'
PercentBindingCount = (BindingCount / formTotalWPInts) * 100
formPercentBindingCount = str(round(PercentBindingCount, 3)) + '% of binding interactions out of all WP Interactions'
formPercentConversionCount, formPercentBindingCount

('57.504% of conversion interactions out of all WP Interactions',
 '14.885% of binding interactions out of all WP Interactions')

In [39]:
IntTypesCount

Unnamed: 0,Interaction Type,Interaction Count
0,DirectedInteraction,44186
1,ComplexBinding,11438
2,Interaction,76840
3,TranscriptionTranslation,508
4,Stimulation,1312
5,Binding,11438
6,Inhibition,2810
7,Conversion,2052
8,Catalysis,7133


In [35]:
totalWPInts, GPMLCountTotalInt

(  Total WP Interactions
 0                 76840, 157446)