In [1]:
import json
import requests

# Querying Signature Commons
Signature Commons includes two API that users can interact with: (1) the metadata API (https://maayanlab.cloud/sigcom/metadata-api), and (2) the data API (https://maayanlab.cloud/sigcom/data-api). The metadata API provides fast full-text search and field comparison filtering of the metadata as well as metadata aggregations for statistical summaries. Users can query the following models (1) resources, (2) libraries, (3) signatures, (4) entities. Meanwhile, the data API performs real-time set-, ranked set-, and two-sided ranked set-enrichment analysis. Here we show some of the basic use cases for the two APIs.

## Metadata Search
Users can query for metadata using JSON formatted loopback queries (https://loopback.io/doc/en/lb3/Querying-data.html). 


**Method** 	`GET`<br>
**URL** 	`/<model>`


**PARAMETERS**<br>
**filter** "stringified JSON query

**RETURNS** JSON list of all the matches

### Full Text Search

In [2]:
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/signatures"
term = "imatinib"

query = {
    "where": {
        "meta": {"fullTextSearch": term}     
    },
    "limit": 1
}
res = requests.get(METADATA_API+ endpoint , params={"filter": json.dumps(query)})
print(json.dumps(res.json(),indent=4))

[
    {
        "$validator": "/dcic/signature-commons-schema/v5/core/signature.json",
        "id": "10cd4a9a-c1a1-11e8-8c55-0242ac170004",
        "library": "30b4aba6-d3e2-11e8-a3b8-787b8ad942f3",
        "meta": {
            "Assay": "microarray",
            "geo_id": "GSE28698",
            "Readout": "mRNA",
            "Organism": "Homo sapiens",
            "Direction": "down",
            "$validator": "/@dcic/signature-commons-schema/v4/meta/signature/sigcom.json",
            "Small_Molecule": [
                {
                    "Name": "imatinib",
                    "Accession": "CHEBI:45783",
                    "DrugBankID": "DB00619"
                }
            ],
            "Number_of_genes": 375,
            "Original_String": "imatinib DB00619 human GSE28698 sample 2524",
            "Perturbation_Type": "small molecule"
        }
    }
]


Alternatively, users can also perform a POST request using the `/find` endpoint

**Method** 	`POST`<br>
**URL** 	`/<model>/find`


**PARAMETERS**<br>
**filter** JSON serialized query

**RETURNS** JSON list of all the matches

### Filtering by fields

In [3]:
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/libraries/find"
term = "L1000"

query = {
    "filter":{
        "where": {
            "meta.Assay": term     
        },
        "limit": 1
    }
}
res = requests.post(METADATA_API+ endpoint, json=query)
print(json.dumps(res.json(),indent=4))

[
    {
        "$validator": "/dcic/signature-commons-schema/v5/core/library.json",
        "id": "30f1072b-d3e2-11e8-bf7f-787b8ad942f3",
        "resource": "5aa7c5f3-b48f-4f02-8737-3b54b13ef286",
        "dataset": "enrichr_geneset",
        "dataset_type": "geneset_library",
        "meta": {
            "DOI": "10.1039/c4mb00677a",
            "Icon": "static/images/LINCS_L1000_Ligand_Perturbations_down.ico",
            "PMID": "25609570",
            "Spec": "(?P<Original_String>((?P<UUID>.*):SC=)?(?P<Gene>.+?)-(?P<Cell_Line>.+?))\\t\\t(?P<Genes>.+)",
            "Assay": "L1000",
            "Program": "LINCS",
            "Readout": "mRNA",
            "Version": "2014",
            "Filename": "LINCS_L1000_Ligand_Perturbations_down",
            "Organism": "Homo sapiens",
            "Weighted": "False",
            "Direction": "up",
            "$validator": "/@dcic/signature-commons-schema/v4/meta/library/sigcom.json",
            "Access_URL": "http://amp.pharm.mssm.edu/

## Aggregating metadata

### Counting results

**Method** 	`GET`<br>
**URL** 	`/<model>/count`


**PARAMETERS**<br>
**filter** JSON serialized query

**RETURNS** The number of matching results

In [4]:
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/signatures/count"
term = "STAT3"

query = {
    "meta": {"fullTextSearch": term}     
}
res = requests.get(METADATA_API+ endpoint,  params={"where": json.dumps(query)})
print(json.dumps(res.json(),indent=4))

{
    "count": 183
}


### Counting top terms

**Method** 	`GET`<br>
**URL** 	`/<model>/value_count`


**PARAMETERS**<br>
**filter** JSON serialized query

**RETURNS** JSON list of the value counts

In [5]:
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/signatures/value_count"
term = "MCF10A"

query = {
    "fields": ["meta.Assay"],
    "where": {
        "meta": {"fullTextSearch": term}     
    }
}
res = requests.get(METADATA_API+ endpoint,  params={"filter": json.dumps(query)})
print(json.dumps(res.json(),indent=4))

{
    "meta.Assay": {
        "L1000": 3263,
        "microarray": 21,
        "RNA-seq": 2,
        "ChIP-seq": 2
    }
}


### Counting top terms

**Method** 	`GET`<br>
**URL** 	`/<model>/key_count`


**PARAMETERS**<br>
**filter** JSON serialized query

**RETURNS** JSON list of the value counts

In [6]:
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/signatures/key_count"
term = "RNA-seq"

query = {
    "where": {
        "meta": {"fullTextSearch": term}     
    }
}
res = requests.get(METADATA_API+ endpoint,  params={"filter": json.dumps(query)})
print(json.dumps(res.json(),indent=4))

{
    "meta.Assay": 9947,
    "meta.Organism": 9947,
    "meta.Readout": 9947,
    "meta.Number_of_genes": 9947,
    "id": 9947,
    "meta.$validator": 9947,
    "library": 9947,
    "meta.Original_String": 9947,
    "meta.Perturbation_Type": 9947,
    "meta.Direction": 7138,
    "meta.Tissue.Name": 6917,
    "meta.Tissue.Accession": 6901,
    "meta.Age": 5893,
    "meta.Sex": 5844,
    "meta.Program": 5836,
    "meta.Patient_ID": 5836,
    "meta.Gene.Name": 3362,
    "meta.Gene.Accession": 2950,
    "meta.Gene_Family": 2574,
    "meta.geo_id": 1302,
    "meta.Gene.Gene_Perturbation": 746,
    "meta.Cell_Line.Name": 462,
    "meta.Cell_Line.Accession": 452,
    "meta.Small_Molecule.Name": 396,
    "meta.Time_Point": 344,
    "meta.Small_Molecule.Accession": 316,
    "meta.Small_Molecule.Concentration": 208,
    "meta.Concentration": 200,
    "meta.Disease.Name": 150,
    "meta.Disease.Accession": 148,
    "meta.Mouse_Model": 36,
    "meta.Disease.Remarks": 16,
    "meta.Other.Name": 10

## Signature Search
Performing signature search requires a few steps that requires the users to query both the metadata and data API. The following demonstrates how to perform signature search using (1) unranked gene sets, and (2) up and down gene sets.

### Getting the list of datasets
Users must specify the dataset they want to perform enrichment analysis on. To figure out the list of available datasets, one can use the `/listdata` endpoint of the data api.

**Method** 	`GET`<br>
**URL** 	`/listdata`


**RETURNS** A JSON list of datasets stating the identifier for the dataset and the datatype

In [7]:
DATA_API = "https://maayanlab.cloud/sigcom/data-api/api/v1"
endpoint = "/listdata"

res = requests.get(DATA_API+ endpoint)
print(json.dumps(res.json(),indent=4))

{
    "repositories": [
        {
            "datatype": "geneset_library",
            "uuid": "enrichr_geneset"
        },
        {
            "datatype": "geneset_library",
            "uuid": "creeds_geneset"
        },
        {
            "datatype": "rank_matrix",
            "uuid": "lincs_clue"
        }
    ]
}


### Converting gene names to UUIDs
Genes should be converted to UUIDs before sending them to the data api for signature search. To do this, we need to perform metadata search on the gene name field

In [8]:
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/entities/find"
genes = ['UTP14A','S100A6','SCAND1']

query = {
    "filter":{
        "where": {
            "meta.Name": {
                "inq": genes
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint, json=query)
gene_ids = [i["id"] for i in res.json()]
print(gene_ids)

['c998bbe8-ed1c-11e8-a7d6-787b8ad942f3', 'c98d5e1a-ed1c-11e8-a48a-787b8ad942f3', 'c99367c6-ed1c-11e8-a4b9-787b8ad942f3']


The way a user perform enrichment analysis is determined by the type of input they are using to query sigcom and the datatype of the datasets.

### Unranked Gene Set
#### Datatype: geneset_library
**Method** 	`POST`<br>
**URL** 	`/enrich/overlap`


**Query**<br>
```
{
  "database": "string",
  "entities": [
    "string"
  ],
  "offset": 0,
  "limit": 0
}
```

**Returns** The analysis results

In [9]:
## Convert gene names to uuids
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/entities/find"
genes = ['UTP14A','S100A6','SCAND1','RRP12','CIAPIN1','ADH5','MTERF3','SPR','CHMP4A','UFM1','VAT1','HACD3','RFC5','COTL1','NPRL2','TRIB3','PCCB','TLE1','CD58','BACE2','KDM3A','TARBP1','RNH1','CHAC1','MBNL2','VDAC1','TES','OXA1L','NOP56','HAT1','CPNE3','DNMT1','ARHGAP1','VPS28','EIF2S2','BAG3','CDCA4','NPDC1','RPS6KA1','FIS1','SYPL1','SARS','CDC45','CANT1','HERPUD1','SORBS3','MRPS2','TOR1A','TNIP1','SLC25A46','MAL','EPCAM','HDAC6','CAPN1','TNRC6B','PKD1','RRS1','HP','ANO10','CEP170B','IDE','DENND2D','CAMK2B','ZNF358','RPP38','MRPL19','NUCB2','GNAI1','LSR','ADGRE2','PKMYT1','CDK5R1','ABL1','PILRB','AXIN1','FBXL8','MCF2L','DBNDD1','IGHMBP2','WIPF2','WFS1','OGFOD2','MAPK1IP1L','COL11A1','REG3A','SERPINA1','MYCBP2','PIGK','TCAP','CRADD','ELK1','DNAJB2','ZBTB16','DAZAP1','MAPKAPK2','EDRF1','CRIP1','UCP3','AGR2','P4HA2']

query = {
    "filter":{
        "where": {
            "meta.Name": {
                "inq": genes
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint,  json=query)
gene_ids = [i["id"] for i in res.json()]

# Signature Search
DATA_API = "https://maayanlab.cloud/sigcom/data-api/api/v1"
endpoint = "/enrich/overlap"

query = {
    "database": "enrichr_geneset",
    "entities": gene_ids,
    "limit": 5
}

res = requests.post(DATA_API + endpoint, json=query)
signatures = [i["uuid"] for i in res.json()["results"]]
signatures

# Get metadata for signatures

endpoint = "/signatures/find"
query = {
    "filter":{
        "where": {
            "id": {
                "inq": signatures
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint,  json=query)
print(json.dumps(res.json(),indent=4))

[
    {
        "$validator": "/dcic/signature-commons-schema/v5/core/signature.json",
        "id": "34bf2434-c1a1-11e8-aaf5-0242ac170004",
        "library": "30e1e0a8-d3e2-11e8-8014-787b8ad942f3",
        "meta": {
            "Assay": "text mining",
            "Tissue": [
                {
                    "Name": "islet of Langerhans",
                    "Accession": "UBERON:0000006"
                }
            ],
            "Readout": "mRNA",
            "Organism": "Homo sapiens",
            "$validator": "/@dcic/signature-commons-schema/v4/meta/signature/sigcom.json",
            "Number_of_genes": 4972,
            "Original_String": "Pancreatic islet",
            "Perturbation_Type": "shared location"
        }
    },
    {
        "$validator": "/dcic/signature-commons-schema/v5/core/signature.json",
        "id": "34b90780-c1a1-11e8-92dc-0242ac170004",
        "library": "30e1e0a8-d3e2-11e8-8014-787b8ad942f3",
        "meta": {
            "Assay": "text mining",


#### Datatype: rank_matrix
**Method** 	`POST`<br>
**URL** 	`/enrich/rank`


**Query**<br>
```
{
  "database": "string",
  "entities": [
    "string"
  ],
  "offset": 0,
  "limit": 0
}
```

**Returns** The analysis results

In [10]:
## Convert gene names to uuids
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/entities/find"
genes = ['UTP14A','S100A6','SCAND1','RRP12','CIAPIN1','ADH5','MTERF3','SPR','CHMP4A','UFM1','VAT1','HACD3','RFC5','COTL1','NPRL2','TRIB3','PCCB','TLE1','CD58','BACE2','KDM3A','TARBP1','RNH1','CHAC1','MBNL2','VDAC1','TES','OXA1L','NOP56','HAT1','CPNE3','DNMT1','ARHGAP1','VPS28','EIF2S2','BAG3','CDCA4','NPDC1','RPS6KA1','FIS1','SYPL1','SARS','CDC45','CANT1','HERPUD1','SORBS3','MRPS2','TOR1A','TNIP1','SLC25A46','MAL','EPCAM','HDAC6','CAPN1','TNRC6B','PKD1','RRS1','HP','ANO10','CEP170B','IDE','DENND2D','CAMK2B','ZNF358','RPP38','MRPL19','NUCB2','GNAI1','LSR','ADGRE2','PKMYT1','CDK5R1','ABL1','PILRB','AXIN1','FBXL8','MCF2L','DBNDD1','IGHMBP2','WIPF2','WFS1','OGFOD2','MAPK1IP1L','COL11A1','REG3A','SERPINA1','MYCBP2','PIGK','TCAP','CRADD','ELK1','DNAJB2','ZBTB16','DAZAP1','MAPKAPK2','EDRF1','CRIP1','UCP3','AGR2','P4HA2']

query = {
    "filter":{
        "where": {
            "meta.Name": {
                "inq": genes
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint,  json=query)
gene_ids = [i["id"] for i in res.json()]

# Signature Search
DATA_API = "https://maayanlab.cloud/sigcom/data-api/api/v1"
endpoint = "/enrich/rank"

query = {
    "database": "lincs_clue",
    "entities": gene_ids,
    "limit": 2
}

res = requests.post(DATA_API + endpoint, json=query)
signatures = [i["uuid"] for i in res.json()["results"]]
signatures

# Get metadata for signatures

endpoint = "/signatures/find"
query = {
    "filter":{
        "where": {
            "id": {
                "inq": signatures
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint,  json=query)
print(json.dumps(res.json(),indent=4))

[
    {
        "$validator": "/dcic/signature-commons-schema/v5/core/signature.json",
        "id": "856b137c-9838-454c-860d-7174ccd66b57",
        "library": "1c38c500-3968-474e-b32e-46dc2046087d",
        "meta": {
            "id": "5141e76cdfe7719b88302457",
            "Assay": "L1000",
            "sig_id": "DOS004_VCAP_24H:BRD-K60056908-001-01-5:5.02",
            "Readout": "gene",
            "created": "2018-11-27T18:17:23.863Z",
            "is_gold": true,
            "pert_id": "BRD-K60056908",
            "pool_id": "epsilon",
            "Organism": "Homo sapiens",
            "ds_index": 120377,
            "zmad_ref": "population",
            "Cell_Line": [
                {
                    "Name": "VCaP",
                    "Accession": "CVCL_2235"
                }
            ],
            "distil_id": [
                "DOS004_VCAP_24H_X1_B2_DUO52HI53LO:C04",
                "DOS004_VCAP_24H_X2_B2_DUO53HI52LO:C04",
                "DOS004_VCAP_24H_X3_B3_DUO

### Up and Down Gene Set
**Method** 	`POST`<br>
**URL** 	`/enrich/ranktwosided`


**Query**<br>
```
{
  "database": "string",
  "up_entities": [
    "string"
  ],
  "down_entities": [
    "string"
  ],
  "offset": 0,
  "limit": 0
}
```

**Returns** The analysis results

In [11]:
## Convert gene names to uuids
METADATA_API = "https://maayanlab.cloud/sigcom/metadata-api"
endpoint = "/entities/find"
up_genes = ['UTP14A','S100A6','SCAND1','RRP12','CIAPIN1','ADH5','MTERF3','SPR','CHMP4A','UFM1','VAT1','HACD3','RFC5','COTL1','NPRL2','TRIB3','PCCB','TLE1','CD58','BACE2','KDM3A','TARBP1','RNH1','CHAC1','MBNL2','VDAC1','TES','OXA1L','NOP56','HAT1','CPNE3','DNMT1','ARHGAP1','VPS28','EIF2S2','BAG3','CDCA4','NPDC1','RPS6KA1','FIS1','SYPL1','SARS','CDC45','CANT1','HERPUD1','SORBS3','MRPS2','TOR1A','TNIP1','SLC25A46']
down_genes = ['MAL','EPCAM','HDAC6','CAPN1','TNRC6B','PKD1','RRS1','HP','ANO10','CEP170B','IDE','DENND2D','CAMK2B','ZNF358','RPP38','MRPL19','NUCB2','GNAI1','LSR','ADGRE2','PKMYT1','CDK5R1','ABL1','PILRB','AXIN1','FBXL8','MCF2L','DBNDD1','IGHMBP2','WIPF2','WFS1','OGFOD2','MAPK1IP1L','COL11A1','REG3A','SERPINA1','MYCBP2','PIGK','TCAP','CRADD','ELK1','DNAJB2','ZBTB16','DAZAP1','MAPKAPK2','EDRF1','CRIP1','UCP3','AGR2','P4HA2']

query = {
    "filter":{
        "where": {
            "meta.Name": {
                "inq": up_genes + down_genes
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint,  json=query)
results = res.json()
up_gene_ids = [i["id"] for i in results if i["meta"]["Name"] in up_genes]
down_gene_ids = [i["id"] for i in results if i["meta"]["Name"] in down_genes]

# Signature Search
DATA_API = "https://maayanlab.cloud/sigcom/data-api/api/v1"
endpoint = "/enrich/ranktwosided"

query = {
    "database": "lincs_clue",
    "up_entities": up_gene_ids,
    "down_entities": down_gene_ids,
    "limit": 2
}

res = requests.post(DATA_API + endpoint, json=query)
signatures = [i["uuid"] for i in res.json()["results"]]
signatures

# Get metadata for signatures

endpoint = "/signatures/find"
query = {
    "filter":{
        "where": {
            "id": {
                "inq": signatures
            }     
        }
    }
}

res = requests.post(METADATA_API+ endpoint,  json=query)
print(json.dumps(res.json(),indent=4))

JSONDecodeError: Expecting value: line 1 column 1 (char 0)