   # Hands-on session: SCAIView API
   

<img style="float: left;" src="./img/SCAIView_API.jpg" width =800>

   
   

# Imports

In [2]:
!pip install owlready2
import requests
import json
import csv
import os
import pandas as pd
import owlready2 as owl

You should consider upgrading via the '/opt/app-root/bin/python3 -m pip install --upgrade pip' command.[0m


# SCAIView APIs

* [SCAIViewNeuro](https://api.neuro.scaiview.com/swagger-ui.html)
* [SCAIViewAcademia](https://api.academia.scaiview.com/swagger-ui.html)

# SCAIView Web-GUI

* [SCAIViewNeuro](https://neuro.scaiview.com/)
* [SCAIViewAcademia](https://academia.scaiview.com/)

# Python library

With the following functions you can access the SCAIView API and send requests to get a response object back. The whole OpenAPI specification to add functions or build your own library can be found here: [SCAIViewNeuro](https://api.neuro.scaiview.com/swagger-ui.html) and [SCAIViewAcademia](https://api.academia.scaiview.com/swagger-ui.html)

## set base_url

In [17]:
base = 'neuro' #'academia' or 'neuro'
base_url = 'https://api.' + base + '.scaiview.com/'
def _url(path):
    return base_url + path

## search-controller

In [20]:
def search_documents(query_obj):
    '''Returns documents for a search query'''
    doc_list = requests.post(_url('api/v6/search'), json = query)
    return doc_list

def get_documents(query_obj):
    ''' Returns documents for a search query in json format'''
    doc_list = requests.post(_url('api/v6/search/documents'), json = query)
    return doc_list

# select data formats for document list: ['ID', 'CSV', 'BIBTEX', 'RIS', 'ENDNOTE']

def get_documents_list(query_obj, form='CSV', page_size='20'):
    '''Returns documents for a search query and exports to the given format.
    form describes the data format the document_list is exported to one of ['ID', 'CSV', 'BIBTEX', 'RIS', 'ENDNOTE']
    page_size: string of an integer that describes how many documents will be listed (max: '2000')
    '''
    doc_list = requests.post(_url('api/v6/search/export?format=' + form + '&page=0&size=' + page_size), json = query_obj)
    return doc_list



## entity-statistics-controller

In [19]:
# terminology list (entities) SCAIView neuro

MESH = "JPMTAGGERANNOTATOR(MESH14.0-SNAPSHOT1585598140)" 
EPO = "JPMTAGGERANNOTATOR(EPO1.0-SNAPSHOT1606393218)"
UBERON = "JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)"
ALZHEIMER = "JPMTAGGERANNOTATOR(ALZHEIMER14.0-SNAPSHOT1585595494)"
BRCO = "JPMTAGGERANNOTATOR(BRCO14.0-SNAPSHOT1585594937)"
HOMO_SAPIENS = "JPMTAGGERANNOTATOR(HOMO_SAPIENS15.0-SNAPSHOT1586876562)"
PTS = "JPMTAGGERANNOTATOR(PTS14.0-SNAPSHOT1585595459)"
NEURONAMES = "JPMTAGGERANNOTATOR(NEURONAMES1.0-SNAPSHOT1606837450)"
FMA = "JPMTAGGERANNOTATOR(FMA1.0-SNAPSHOT1585594358)"

# terminology list (entities) SCAIView academia

UBERON = "JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)"
GO = "JPMTAGGERANNOTATOR(GO15.0-SNAPSHOT1585596145)"
HYPO_FINDER = "JPMTAGGERANNOTATOR(HYPOTHESISFINDER14.0-SNAPSHOT1585595494)"
DRUG_BANK = "JPMTAGGERANNOTATOR(DRUGBANK14.0-SNAPSHOT1591360597)"
HOMO_SAPIENS = "JPMTAGGERANNOTATOR(HOMO_SAPIENS15.0-SNAPSHOT1586876562)"


def get_stats(query_obj, entity):
    '''Retrieves statistics about matches to the given query.'''
    corpus = requests.put(_url('api/v6/statistics/entities/' + entity), json = query)
    return corpus

def export_stats(query_obj, entity):
    '''Retrieves statistic about matches to the given query and exports the result to CSV.'''
    corpus = requests.put(_url('api/v6/statistics/entities/' + entity + '/export'), json = query_obj)
    return corpus

# SCAIView cheet sheet

The syntax to build the query_objects can be a bit confusing and is quite hard to find on the OpenAPI specification website [NEURO](https://api.neuro.scaiview.com/swagger-ui.html), [ACADEMIA](https://api.academia.scaiview.com/swagger-ui.html).
Therefore, we provide this little cheet sheet as a excerpt to help you out. 

## Different query forms:  

### FreetextQuery
Syntax: {'FREETEXT': {'searchTerm': '**freetext**'}}. Here you can choose any terms for **freetext** as you like. Much like in Pubmed or in a Google search. For example, you can look for 'Alzheimer' in this query.
### ConceptQuery
Syntax: {'CONCEPTID': {'conceptIdentifier': '**ID**'}}. The concept query lets you search for terms that have a unique identifier and are therefore very precise. For the concept query, you have to specify two things for '**ID**': first you need to specify the concept (i.e. the terminology) you want to use and which specific term (i.e. the _unique-ID_ of the term) you want to use. For exmample, if you want look for 'Alzheimer Disease' in the _MeSH_-terminology you would have to combine the concept: _'mesh'_ and the _unique-ID_: _'D000544'_ for '**ID**' = _'mesh:D000544'_.     

Other concepts include:      

[UBERON](http://uberon.github.io/)(Uberon, an integrative multi-species anatomy ontology). Exemplary syntax: **ID** = 'uberon:UBERON:0016538' corresponds to [Temporal Cortex](https://www.ebi.ac.uk/ols/ontologies/uberon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUBERON_0016538)  
[MESH](https://www.ncbi.nlm.nih.gov/mesh/)(Medical Subject Headings). Exemplary syntax: **ID** = 'mesh:D016229', corresponds to [Amyloid beta-Peptides](https://www.ncbi.nlm.nih.gov/mesh/68016229)  
[HGNC](https://www.genenames.org/)(HUGO Gene Nomenclature Committee). Exemplary syntax: **ID** = 'hgnc:7881', corresponds to [NOTCH1](https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:7881)  
[GO](http://geneontology.org/)(Gene ontology). Exemplary syntax: **ID** = 'go:GO:0071875' correpsonds to [adrenergic receptor signaling pathway](https://www.ebi.ac.uk/QuickGO/term/GO:0071875)  
[CHeBI](https://www.ebi.ac.uk/chebi/)(Chemical Entities of Biological Interest). Exemplary syntax: **ID**: 'chebi:CHEBI:15355' corresponds to [acetylcholine](https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:15355)

### ToolQuery
Syntax: {"TOOL":{"id":"**entity**"}}. With the tool query, you can limit your query only to documents that have at least one annotation in the terminology specified by **entity**. For example, if you are only interested in documents that also have annotation in _Uberon_: **entity** = 'JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)'. The **entity** for the ToolQuery are the same as the ones used for the entity-statistics controller. Must be one of the following:  

_SCAIView_Neuro_: 

[MESH](https://www.ncbi.nlm.nih.gov/mesh/) : "JPMTAGGERANNOTATOR(MESH14.0-SNAPSHOT1585598140)"   
[EPO](http://www.ontobee.org/ontology/EPO)(Epidemiology ontology) : "JPMTAGGERANNOTATOR(EPO1.0-SNAPSHOT1606393218)"  
[UBERON](http://uberon.github.io/) : "JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)"  
[ALZHEIMER](https://bioportal.bioontology.org/ontologies/ADO) : "JPMTAGGERANNOTATOR(ALZHEIMER14.0-SNAPSHOT1585595494)"  
[BRCO](https://bioportal.bioontology.org/ontologies/BRCT)(Brain Region Cell Type ontology) : "JPMTAGGERANNOTATOR(BRCO14.0-SNAPSHOT1585594937)"  
HOMO_SAPIENS : "JPMTAGGERANNOTATOR(HOMO_SAPIENS15.0-SNAPSHOT1586876562)"  
[PTS](https://rohan.scai.fraunhofer.de/ols/ontologies/pts)(Pathway terminology system) : "JPMTAGGERANNOTATOR(PTS14.0-SNAPSHOT1585595459)"   
[NeuroNames](http://braininfo.rprc.washington.edu/nnont.aspx)(NeuroNames Standard Nomenclature and Ontology) : "JPMTAGGERANNOTATOR(NEURONAMES1.0-SNAPSHOT1606837450)"  
[FMA](https://bioportal.bioontology.org/ontologies/FMA)(Foundational Model of Anatomy Ontology) : "JPMTAGGERANNOTATOR(FMA1.0-SNAPSHOT1585594358)"  

_SCAIView_Academia_:     

[UBERON](http://uberon.github.io/): "JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)"  
[GO](http://geneontology.org/) : "JPMTAGGERANNOTATOR(GO15.0-SNAPSHOT1585596145)"  
[HYPO_FINDER](https://www.scaiview.com/en/scaiview-distributions/scaiview-academia.html) : "JPMTAGGERANNOTATOR(HYPOTHESISFINDER14.0-SNAPSHOT1585595494)"  
[DRUG_BANK](https://go.drugbank.com/) : "JPMTAGGERANNOTATOR(DRUGBANK14.0-SNAPSHOT1591360597)"  
HOMO_SAPIENS : "JPMTAGGERANNOTATOR(HOMO_SAPIENS15.0-SNAPSHOT1586876562)"  

### PublicationTypeQuery
Syntax: {"PUBTYPE":{"publicationTypeIdentifier":"**Type**"}}. Let´s you search for a certain of **Type** document, e.g. Review **Type** = 'D016454'. Must be one of the following:  
Journal_article = 'D016428'  
Review = 'D016454'  
Case_reports = 'D002363'  
Comparative_study = 'D003160'
Systematic_review = 'D000078182'
...

### DocumentTypeQuery
Syntax: {"DOCTYPE":{"documentType":{**string**}}. Let´s you differentiate between abstract (**string** = 'PUBMED_ABSTRACT') and fulltext (**string**='PMC_FULLTEXT') search.  

## How to build a query object

Abstract syntax tree with composite pattern

<img style="float: left;" src="./img/Abstract_syntax_tree.jpg" width =800>  


query_object = {"AND":{"lhs":{"OR":{"lhs":{"CONCEPTID":{"conceptIdentifier":"mesh:d004827"}},"rhs":{"CONCEPTID":{"conceptIdentifier":"hgnc:7881"}}}},"rhs":{"AND":{"lhs":{"AND":{"lhs":{"TOOL":{"id":"JPMTAGGERANNOTATOR(MESH14.0-SNAPSHOT1585598140)"}},"rhs":{"TOOL":{"id":"JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)"}}}},"rhs":{"PUBTYPE":{"publicationTypeIdentifier":"D016430"}}}}}}


# Example query 1

Documents that contain (freetext Alzheimer AND freetext Abeta) AND have at least one UBERON annotation

In [7]:
query = {"AND":{"lhs":{"AND":{"lhs":{"FREETEXT":{"searchTerm":"Alzheimer"}},"rhs":{"FREETEXT":{"searchTerm":"Abeta"}}}},
                "rhs":{"TOOL":{"id":"JPMTAGGERANNOTATOR(UBERON15.0-SNAPSHOT1585594963)"}}}}

# this specifies the name of the created csv_files
query_name = 'Abeta'

<img style="float: left;" src="./img/Abeta_query.jpg" width =800> 

### Create and inspect document corpus

In [11]:
doc_list = get_documents_list(query, 'CSV', '200')
f = open('./Document_corpus/' + query_name + '.csv', "w")
f.write(doc_list.text)
f.close()
pd.read_csv('./Document_corpus/' + query_name + '.csv')


Unnamed: 0,id,title,document type,publication type,references
0,PMID:17716970,Presenilin 1 regulates epidermal growth factor...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]
1,PMCID:PMC5201440,Natural product HTP screening for evidence of ...,PMC_FULLTEXT,[NOT AVAILABLE],[NOT AVAILABLE]
2,PMID:27807401,Alzheimer Disease: Crosstalk between the Canon...,PUBMED_ABSTRACT,"D016454:Review, D016428:Journal Article",[NOT AVAILABLE]
3,PMCID:PMC6194433,Early Life Stress and Epigenetics in Late-onse...,PMC_FULLTEXT,[NOT AVAILABLE],[NOT AVAILABLE]
4,PMCID:PMC2781139,The effect of curcumin (turmeric) on Alzheimer...,PMC_FULLTEXT,[NOT AVAILABLE],[NOT AVAILABLE]
...,...,...,...,...,...
160,PMCID:PMC4150521,Alzheimer's Disease and HLA-A2: Linking Neurod...,PMC_FULLTEXT,[NOT AVAILABLE],"other:2-s2.0-34447284561, pmid:17342681, other..."
161,PMID:20713699,Stabilization of neurotoxic Alzheimer amyloid-...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]
162,PMID:15534188,A novel presenilin-1 mutation (Leu85Pro) in ea...,PUBMED_ABSTRACT,"D002363:Case Reports, D016428:Journal Article",[NOT AVAILABLE]
163,PMCID:PMC4228955,Adipokines: a link between obesity and dementia?,PMC_FULLTEXT,[NOT AVAILABLE],[NOT AVAILABLE]


### Create semantic associations

In [8]:
corpus = export_stats(query, UBERON)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

In [9]:
df

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments
0,UBERON:UBERON_2002175,130,0.531351,153910
1,UBERON:UBERON_0000955,116,0.724619,95963
2,UBERON:UBERON_0004529,111,0.466968,128832
3,UBERON:UBERON_0001851,102,0.756390,69499
4,UBERON:UBERON_0013702,96,0.512582,92327
...,...,...,...,...
1104,UBERON:UBERON_0035652,1,0.012063,315
1105,UBERON:UBERON_0001673,1,0.011159,366
1106,UBERON:UBERON_0000101,1,0.011894,324
1107,UBERON:UBERON_0002523,1,0.004257,1151


## Interpretation of the relevance score (kullbackLeiberDivergence)


\begin{equation*}
D_{KL}(P || Q) = \sum_{x \in X} P(x) \ln(  \frac{P(x)}{Q(x)} ) \\
\end{equation*}
_where_ for each Uberon_term
\begin{equation*}
P = \frac{numberOfDocumentsInQuery}{totalDocumentsInQuery}, 
Q = \frac{totalNumberOfDocuments}{totalDocumentsInSCAIView}
\end{equation*}

both demoniators refer to the Uberon_term

## Create semantic association and add label for Uberon-terms

In [10]:
# create csv with relevance scores for each uberon-term in the query

corpus = export_stats(query, UBERON)
print(corpus)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

# label each uberon-term using the uberon-ontology data in .owl format

# directory to the ontology data file
ub_dir = "./SCAIView_csv/labelled/composite-metazoan.owl"

# Get namespace
obo = owl.get_namespace("http://purl.obolibrary.org/obo/")

# load uberon
uberon = owl.get_ontology(ub_dir).load()

# Directory to SCAIView CSVs
mdir = "./SCAIView_csv/"

# create dictionary
ub_IDs = dict()  

scai_csv = pd.DataFrame(columns=['entity'])

labs = []

for ID in df['entity']:
            ub_ID = ID[7:]
            lab = uberon.search(iri="*" + ub_ID)[0].label[0]
            labs.append(lab)
            pass

df['label'] = labs
scai_csv = scai_csv.append(df)
df.to_csv(mdir + '/labelled/' + query_name + '.csv')

In [24]:
df

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments,label
0,UBERON:UBERON_2002175,130,0.531351,153910,rostral octaval nerve motor nucleus
1,UBERON:UBERON_0000955,116,0.724619,95963,brain
2,UBERON:UBERON_0004529,111,0.466968,128832,anatomical projection
3,UBERON:UBERON_0001851,102,0.756390,69499,cortex
4,UBERON:UBERON_0013702,96,0.512582,92327,body proper
...,...,...,...,...,...
1104,UBERON:UBERON_0035652,1,0.012063,315,fibular nerve
1105,UBERON:UBERON_0001673,1,0.011159,366,central retinal vein
1106,UBERON:UBERON_0000101,1,0.011894,324,lobe of lung
1107,UBERON:UBERON_0002523,1,0.004257,1151,tunica intima


## Example query 2

Looking for semantic association between Alzheimer and Atrophy

In [169]:
query = {"AND":{"lhs":{"OR":{"lhs":{"FREETEXT":{"searchTerm":"Alzheimer"}},"rhs":{"FREETEXT":{"searchTerm":"Alzheimer's"}}}},
                "rhs":{"CONCEPTID":{"conceptIdentifier":"mesh:D001284"}}}}

query_name = 'Atrophy'

## Create and inspect document corpus

In [170]:
doc_list = get_documents_list(query, 'CSV', '200')
f = open('./Document_corpus/' + query_name + '.csv', "w")
f.write(doc_list.text)
f.close()
pd.read_csv('./Document_corpus/' + query_name + '.csv')


Unnamed: 0,id,title,document type,publication type,references
0,PMID:10528301,[Image characterization of Alzheimer's disease...,PUBMED_ABSTRACT,"D004740:English Abstract, D016428:Journal Arti...",[NOT AVAILABLE]
1,PMID:22994551,Neurosyphilis with dementia and bilateral hipp...,PUBMED_ABSTRACT,"D002363:Case Reports, D016428:Journal Article,...",[NOT AVAILABLE]
2,PMID:9403898,Temporal lobe magnetic resonance imaging can d...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]
3,PMID:8797529,In vivo mapping of cholinergic terminals in no...,PUBMED_ABSTRACT,"D016428:Journal Article, D013486:Research Supp...",[NOT AVAILABLE]
4,PMID:9053390,[Posterior cortical atrophy--a new dementia sy...,PUBMED_ABSTRACT,"D004740:English Abstract, D016428:Journal Arti...",[NOT AVAILABLE]
...,...,...,...,...,...
83,PMCID:PMC3921468,The Alzheimer Pandemic: Is Paracetamol to Blame?,PMC_FULLTEXT,[NOT AVAILABLE],[NOT AVAILABLE]
84,PMID:25531628,Interactions between traumatic brain injury an...,PUBMED_ABSTRACT,"D016428:Journal Article, D052061:Research Supp...",[NOT AVAILABLE]
85,PMID:15084793,Does the pattern of atrophy of the Corpus call...,PUBMED_ABSTRACT,"D016430:Clinical Trial, D003160:Comparative St...",[NOT AVAILABLE]
86,PMID:11930016,Mapping the evolution of regional atrophy in A...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]


## Create semantic association and add label for Uberon-terms

In [171]:
# create csv with relevance scores for each uberon-term in the query

corpus = export_stats(query, UBERON)
print(corpus)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

# label each uberon-term using the uberon-ontology data in .owl format

# directory to the ontology data file
ub_dir = "./SCAIView_csv/labelled/composite-metazoan.owl"

# Get namespace
obo = owl.get_namespace("http://purl.obolibrary.org/obo/")

# load uberon
uberon = owl.get_ontology(ub_dir).load()

# Directory to SCAIView CSVs
mdir = "./SCAIView_csv/"

# create dictionary
ub_IDs = dict()  

scai_csv = pd.DataFrame(columns=['entity'])

labs = []

for ID in df['entity'][0:]:
            ub_ID = ID[7:]
            lab = uberon.search(iri="*" + ub_ID)[0].label[0]
            labs.append(lab)
            pass

df['label'] = labs
scai_csv = scai_csv.append(df)
df.to_csv(mdir + '/labelled/' + query_name + '.csv')


<Response [200]>


In [172]:
#show csv with labels
df.head(40)

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments,label
0,UBERON:UBERON_0001851,53,0.729327,69499,cortex
1,UBERON:UBERON_0000955,22,0.002265,95963,brain
2,UBERON:UBERON_0000956,18,0.226958,26122,cerebral cortex
3,UBERON:UBERON_0004529,18,-0.099442,128832,anatomical projection
4,UBERON:UBERON_0014899,18,0.006475,76761,anterolateral ligament of knee
5,UBERON:UBERON_0013702,17,-0.040596,92327,body proper
6,UBERON:UBERON_2002175,14,-0.145621,153910,rostral octaval nerve motor nucleus
7,UBERON:UBERON_0035944,13,-0.069832,91802,life-death temporal boundary
8,UBERON:UBERON_0007222,13,0.075183,34398,late adult stage
9,UBERON:UBERON_0001016,12,-0.058773,81279,nervous system


# Wordcloud

<img style="float: left;" src="./img/WordCloud.png" width =800> 

## Define query

In [25]:
query = query = {"AND":{"lhs":{"OR":{"lhs":{"FREETEXT":{"searchTerm":"Alzheimer"}},"rhs":{"FREETEXT":{"searchTerm":"Alzheimer's"}}}},
                "rhs":{"CONCEPTID":{"conceptIdentifier":"mesh:D001284"}}}}

query_name = 'Plasticity'

In [26]:
doc_list = get_documents_list(query, 'CSV', '200')
f = open('./Document_corpus/' + query_name + '.csv', "w")
f.write(doc_list.text)
f.close()
pd.read_csv('./Document_corpus/' + query_name + '.csv')

Unnamed: 0,id,title,document type,publication type,references
0,PMID:10528301,[Image characterization of Alzheimer's disease...,PUBMED_ABSTRACT,"D004740:English Abstract, D016428:Journal Arti...",[NOT AVAILABLE]
1,PMID:22994551,Neurosyphilis with dementia and bilateral hipp...,PUBMED_ABSTRACT,"D002363:Case Reports, D016428:Journal Article,...",[NOT AVAILABLE]
2,PMID:9403898,Temporal lobe magnetic resonance imaging can d...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]
3,PMID:8797529,In vivo mapping of cholinergic terminals in no...,PUBMED_ABSTRACT,"D016428:Journal Article, D013486:Research Supp...",[NOT AVAILABLE]
4,PMID:9053390,[Posterior cortical atrophy--a new dementia sy...,PUBMED_ABSTRACT,"D004740:English Abstract, D016428:Journal Arti...",[NOT AVAILABLE]
...,...,...,...,...,...
83,PMCID:PMC3921468,The Alzheimer Pandemic: Is Paracetamol to Blame?,PMC_FULLTEXT,[NOT AVAILABLE],[NOT AVAILABLE]
84,PMID:25531628,Interactions between traumatic brain injury an...,PUBMED_ABSTRACT,"D016428:Journal Article, D052061:Research Supp...",[NOT AVAILABLE]
85,PMID:15084793,Does the pattern of atrophy of the Corpus call...,PUBMED_ABSTRACT,"D016430:Clinical Trial, D003160:Comparative St...",[NOT AVAILABLE]
86,PMID:11930016,Mapping the evolution of regional atrophy in A...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]


In [27]:
corpus = export_stats(query, UBERON)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

In [28]:
df

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments
0,UBERON:UBERON_0001851,53,0.729327,69499
1,UBERON:UBERON_0000955,22,0.002265,95963
2,UBERON:UBERON_0000956,18,0.226958,26122
3,UBERON:UBERON_0004529,18,-0.099442,128832
4,UBERON:UBERON_0014899,18,0.006475,76761
...,...,...,...,...
259,UBERON:UBERON_0009835,1,0.025814,454
260,UBERON:UBERON_0001556,1,0.017175,971
261,UBERON:UBERON_0000467,1,-0.012705,13464
262,UBERON:UBERON_0000345,1,-0.013405,14320


In [29]:
# create csv with relevance scores for each uberon-term in the query

corpus = export_stats(query, UBERON)
print(corpus)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

# label each uberon-term using the uberon-ontology data in .owl format

# directory to the ontology data file
ub_dir = "./SCAIView_csv/labelled/composite-metazoan.owl"

# Get namespace
obo = owl.get_namespace("http://purl.obolibrary.org/obo/")

# load uberon
uberon = owl.get_ontology(ub_dir).load()

# Directory to SCAIView CSVs
mdir = "./SCAIView_csv/"

# create dictionary
ub_IDs = dict()  

scai_csv = pd.DataFrame(columns=['entity'])

labs = []

for ID in df['entity'][0:]:
            ub_ID = ID[7:]
            lab = uberon.search(iri="*" + ub_ID)[0].label[0]
            labs.append(lab)
            pass

df['label'] = labs
scai_csv = scai_csv.append(df)
df.to_csv(mdir + '/labelled/' + query_name + '.csv')

<Response [200]>


In [30]:
df

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments,label
0,UBERON:UBERON_0001851,53,0.729327,69499,cortex
1,UBERON:UBERON_0000955,22,0.002265,95963,brain
2,UBERON:UBERON_0000956,18,0.226958,26122,cerebral cortex
3,UBERON:UBERON_0004529,18,-0.099442,128832,anatomical projection
4,UBERON:UBERON_0014899,18,0.006475,76761,anterolateral ligament of knee
...,...,...,...,...,...
259,UBERON:UBERON_0009835,1,0.025814,454,anterior cingulate cortex
260,UBERON:UBERON_0001556,1,0.017175,971,lower urinary tract
261,UBERON:UBERON_0000467,1,-0.012705,13464,anatomical system
262,UBERON:UBERON_0000345,1,-0.013405,14320,myelin


In [31]:
query = query = {"AND":{"lhs":{"OR":{"lhs":{"FREETEXT":{"searchTerm":"Alzheimer"}},"rhs":{"FREETEXT":{"searchTerm":"Alzheimer's"}}}},
                "rhs":{"CONCEPTID":{"conceptIdentifier":"mesh:D007395"}}}}

query_name = 'Interneurons'

In [32]:
doc_list = get_documents_list(query, 'CSV', '200')
f = open('./Document_corpus/' + query_name + '.csv', "w")
f.write(doc_list.text)
f.close()
pd.read_csv('./Document_corpus/' + query_name + '.csv')

Unnamed: 0,id,title,document type,publication type,references
0,PMID:17765724,Somatostatin in the dentate gyrus.,PUBMED_ABSTRACT,"D016428:Journal Article, D016454:Review",[NOT AVAILABLE]
1,PMID:2874591,Somatostatin in the central nervous system: ph...,PUBMED_ABSTRACT,"D016428:Journal Article, D016454:Review",[NOT AVAILABLE]
2,PMID:26586374,The basolateral amygdala γ-aminobutyric acider...,PUBMED_ABSTRACT,"D016428:Journal Article, D052061:Research Supp...",[NOT AVAILABLE]
3,PMID:16874665,Cellular and molecular mechanisms involved in ...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]
4,PMID:8232900,Chandelier cell axons identified by parvalbumi...,PUBMED_ABSTRACT,"D016428:Journal Article, D013485:Research Supp...",[NOT AVAILABLE]


In [33]:
corpus = export_stats(query, UBERON)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

In [34]:
df

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments
0,UBERON:UBERON_2002175,4,0.559843,153910
1,UBERON:UBERON_0000955,2,0.191625,95963
2,UBERON:UBERON_0004529,2,0.073806,128832
3,UBERON:UBERON_0001016,2,0.258055,81279
4,UBERON:UBERON_0016538,1,0.513937,5931
5,UBERON:UBERON_0035931,1,0.929657,742
6,UBERON:UBERON_0034931,1,0.86387,1031
7,UBERON:UBERON_0000956,1,0.21742,26122
8,UBERON:UBERON_0000007,1,0.436714,8726
9,UBERON:UBERON_0000914,1,0.236796,23710


In [44]:
query = query = {"CONCEPTID":{"conceptIdentifier":"mesh:D003243"}}

query_name = 'Only_Consciousness'

In [46]:
corpus = export_stats(query, UBERON)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

In [47]:
# create csv with relevance scores for each uberon-term in the query

corpus = export_stats(query, UBERON)
print(corpus)
f = open('./SCAIView_csv/' + query_name + '.csv', "w")
f.write(corpus.text)
f.close()
df = pd.read_csv('./SCAIView_csv/' + query_name + '.csv', names=['entity',
       'numberOfDocumentsInQuery',
       'kullbackLeiberDivergence',
       'totalNumberOfDocuments'])

# label each uberon-term using the uberon-ontology data in .owl format

# directory to the ontology data file
ub_dir = "./SCAIView_csv/labelled/composite-metazoan.owl"

# Get namespace
obo = owl.get_namespace("http://purl.obolibrary.org/obo/")

# load uberon
uberon = owl.get_ontology(ub_dir).load()

# Directory to SCAIView CSVs
mdir = "./SCAIView_csv/"

# create dictionary
ub_IDs = dict()  

scai_csv = pd.DataFrame(columns=['entity'])

labs = []

for ID in df['entity'][0:]:
            ub_ID = ID[7:]
            lab = uberon.search(iri="*" + ub_ID)[0].label[0]
            labs.append(lab)
            pass

df['label'] = labs
scai_csv = scai_csv.append(df)
df.to_csv(mdir + '/labelled/' + query_name + '.csv')

<Response [200]>


In [48]:
df

Unnamed: 0,entity,numberOfDocumentsInQuery,kullbackLeiberDivergence,totalNumberOfDocuments,label
0,UBERON:UBERON_0002542,52,0.069642,94357,scale
1,UBERON:UBERON_0014899,40,0.040400,76761,anterolateral ligament of knee
2,UBERON:UBERON_0035971,26,0.063698,39061,postsubiculum
3,UBERON:UBERON_0000033,25,-0.003680,58406,head
4,UBERON:UBERON_0001851,20,-0.049654,69499,cortex
...,...,...,...,...,...
112,UBERON:UBERON_0002807,1,0.011848,304,right occipital lobe
113,UBERON:UBERON_0009835,1,0.009489,454,anterior cingulate cortex
114,UBERON:UBERON_0015708,1,0.002811,1413,splenium of the corpus callosum
115,UBERON:UBERON_0002240,1,-0.015984,34492,spinal cord
