In [1]:
%load_ext dotenv
%dotenv

In [2]:
import requests
import base64
import json
import os
from requests.auth import HTTPBasicAuth

## Update from previous lab meeting

![old sigcom](img/old_sigcom.png)

UPDATED:

https://amp.pharm.mssm.edu/sigcom

https://amp.pharm.mssm.edu/btools

## Building sigcom via a docker compose

https://github.com/dcic/signature-commons/tree/monorepo
![https://github.com/dcic/signature-commons/tree/monorepo](img/monorepo.png)

## Setting up your Postgres database

1. Check if postgres in installed.
    - Installation instructions:
        - Windows: http://www.postgresqltutorial.com/install-postgresql/
        - Mac
            - You can use homebrew to install postgres in mac (https://brew.sh/)
            - ```brew install postgres```
        - Linux: https://www.postgresql.org/download/linux/
2. Log in to postgres shell: ```psql postgres```
3. Create database ```create database <database_name>;```
4. Create user and grant privileges 
```
    create user <user> with encrypted password '<password>';
    grant all privileges on database <database_name> to <user>;
```
5. On the .env file, change the following variable: 
```
# for dev
TYPEORM_URL=postgres://<user>:<password>@localhost:5432/<database_name>
```

## Database Schema
![Database schema](img/Database.png)

## Post your data to the database


In [3]:
API_url = "http://localhost:3000/signature-commons-metadata-api/%s/%s"
credentials=HTTPBasicAuth(os.getenv("USERNAME"),os.getenv("PASSWORD"))

In [4]:
def post_data(data,model):
    res = requests.post(API_url%(model,""), auth=credentials, json=data)
    if not res.ok:
        raise Exception(res.text) 

In [46]:
def delete_data(data,model):
    res = requests.delete(API_url%(model,data["id"]), auth=credentials)
    if not res.ok:
        raise Exception(res.text) 

## A note on validators

Signature Commons is able to ingest any kind of metadata. Validators gives structure to our database by defining how a metadata of a certain type should look like.

```/dcic/signature-commons-schema/v5/core/resource.json```
![validator](img/validator.png)

### Resources

In [5]:
with open("data/resources.json") as o:
    resources = json.loads(o.read())
resources[0]

{'$validator': '/dcic/signature-commons-schema/v5/core/resource.json',
 'id': '40512bee-879f-4c4c-a850-890ce6fadcd4',
 'meta': {'URL': 'http://archs4.cloud',
  'PMID': '29636450',
  'icon': 'static/images/ARCHS4_Cell_Lines.ico',
  '$validator': '/@dcic/signature-commons-schema/v5/meta/resource/sigcom.json',
  'description': '(From the website) ARCHS4 provides access to gene counts from HiSeq 2000, HiSeq 2500 and NextSeq 500 platforms for human and mouse experiments from GEO and SRA.',
  'Resource_Name': 'ARCHS4'}}

In [6]:
# Post data
for i in resources:
    post_data(i,"resources")

### Libraries

In [7]:
with open("data/libraries.json") as o:
    libraries = json.loads(o.read())
libraries[0]

{'$validator': '/dcic/signature-commons-schema/v5/core/library.json',
 'id': '3092d22e-d3e2-11e8-9544-787b8ad942f3',
 'resource': '40512bee-879f-4c4c-a850-890ce6fadcd4',
 'dataset': 'enrichr_geneset',
 'dataset_type': 'geneset_library',
 'meta': {'DOI': '10.1038/s41467-018-03751-6',
  'Icon': 'static/images/ARCHS4_IDG_Targets_Coexp.ico',
  'PMID': '29636450',
  'Spec': '(?P<Original_String>((?P<UUID>.*):SC=)?(?P<Gene>.+?)_(?P<Program>.+?)_(?P<Gene_Family>.+?)_(?P<Processing_Type>.+?))\\t\\t(?P<Genes>.+)',
  'Year': '2018',
  'Assay': 'RNA-seq',
  'Readout': 'mRNA',
  'Version': '2018',
  'Filename': 'ARCHS4_IDG_Coexp',
  'Organism': 'Homo sapiens',
  'Weighted': 'False',
  '$validator': '/@dcic/signature-commons-schema/v4/meta/library/sigcom.json',
  'Access_URL': 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary?mode=text&libraryName=ARCHS4_IDG_Coexp',
  'Description': 'Top 300 genes from All RNA-seq and CHIP-seq Signature Search Space (ARCHS4) that are co-expressed with under-studied

In [8]:
# Post data
for i in libraries:
    post_data(i,"libraries")

### Signatures

In [9]:
with open("data/signatures.json") as o:
    signatures = json.loads(o.read())
signatures[0]

{'$validator': '/dcic/signature-commons-schema/v5/core/signature.json',
 'id': '0eb8b9b4-c1a1-11e8-9565-0242ac170004',
 'library': '30b22907-d3e2-11e8-92a7-787b8ad942f3',
 'meta': {'Assay': 'microarray',
  'Tissue': [{'Name': 'kidney', 'Accession': 'UBERON:0002113'}],
  'Readout': 'mRNA',
  'Organism': 'Rattus norvegicus',
  'Direction': 'down',
  '$validator': '/@dcic/signature-commons-schema/v4/meta/signature/sigcom.json',
  'Time_Point': '5d',
  'Small_Molecule': [{'Name': 'ibuprofen',
    'Accession': 'CHEBI:5855',
    'Concentration': '263 mg/kg in Corn Oil'}],
  'Number_of_genes': 260,
  'Original_String': 'Ibuprofen-263 mg/kg in Corn Oil-Rat-Kidney-5d-dn',
  'Perturbation_Type': 'small molecule'}}

In [10]:
# Post data
for i in signatures:
    post_data(i,"signatures")

#### Invalid metadata are checked by the metadata api

validator: https://github.com/dcic/signature-commons-schema/blob/master/meta/signature/sigcom.json

In [11]:
invalid_direction = {
    '$validator': '/dcic/signature-commons-schema/v5/core/signature.json',
    'id': '0eb8b9b4-c1a1-11e8-9565-0242ac170005',
    'library': '30b22907-d3e2-11e8-92a7-787b8ad942f3',
    'meta': {
        'Assay': 'microarray',
        'Tissue': [{'Name': 'kidney', 'Accession': 'UBERON:0002113'}],
        'Readout': 'mRNA',
        'Organism': 'Rattus norvegicus',
        'Direction': 'dn',
        '$validator': '/@dcic/signature-commons-schema/v4/meta/signature/sigcom.json',
        'Time_Point': '5d',
        'Small_Molecule': [{'Name': 'ibuprofen',
        'Accession': 'CHEBI:5855',
        'Concentration': '263 mg/kg in Corn Oil'}],
        'Number_of_genes': 260,
        'Original_String': 'Ibuprofen-263 mg/kg in Corn Oil-Rat-Kidney-5d-dn',
        'Perturbation_Type': 'small molecule'
    }
}

In [12]:
post_data(invalid_direction,"signatures")

Exception: {"error":{"statusCode":406,"name":"NotAcceptableError","message":{"message":"validation failed","errors":[{"keyword":"enum","dataPath":"/Direction","schemaPath":"/dcic/signature-commons-schema/v5/core/meta.json/$validator","params":{"allowedValues":["up","down"]},"message":"should be equal to one of the allowed values","schema":"/@dcic/signature-commons-schema/v4/meta/signature/sigcom.json","parentSchema":{"description":"Tells whether the data are up regulated or down regulated","enum":["up","down"],"type":"string"},"data":{"Assay":"microarray","Tissue":[{"Name":"kidney","Accession":"UBERON:0002113"}],"Readout":"mRNA","Organism":"Rattus norvegicus","Direction":"dn","$validator":"/@dcic/signature-commons-schema/v4/meta/signature/sigcom.json","Time_Point":"5d","Small_Molecule":[{"Name":"ibuprofen","Accession":"CHEBI:5855","Concentration":"263 mg/kg in Corn Oil"}],"Number_of_genes":260,"Original_String":"Ibuprofen-263 mg/kg in Corn Oil-Rat-Kidney-5d-dn","Perturbation_Type":"small molecule"}}],"validation":true,"ajv":true,"name":"Error"}}}

## Refresh material view

In [18]:
API_url%("optimize","refresh")

'http://localhost:3000/signature-commons-metadata-api/optimize/refresh'

In [17]:
res = requests.get(API_url%("optimize","refresh"), auth=credentials)
res.ok

True

### Checkpoint
http://localhost:3001/

## Customizing the UI

There are three types of schemas available for use in signature commons, the three are identified based on their validators.
1. ui-schemas
    - Used to make searches and info cards more informative
    - "/dcic/signature-commons-schema/v5/meta/schema/ui-schema.json"
2. counting schemas
    - for counting
    - used to add charts, starts, and even filters
    - Tell the ui what type of count it is depending on the tag
    - "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
3. landing schemas
    - changes in the landing page, and routing
    - "/dcic/signature-commons-schema/v5/meta/schema/landing-ui.json"

### Modify Search Results
```
{
  "match": {
    "${$validator}": "/@?dcic/signature-commons-schema/(v[0-9]+/)?core/library.json",
    "${dataset}": "enrichr_geneset"
  },
  "properties": {
    "Image": {
      "visibility": 1,
      "type": "img",
      "src": "${PREFIX}/${meta['Icon']}",
      "alt": "${meta['Primary_Resource']}",
      "priority": 1,
      "icon": true
    },
    "Library": {
      "visibility": 1,
      "type": "text",
      "text": "${meta['Library_name']}",
      "priority": 2,
      "hyperlink": "${meta.Access_URL}",
      "name": true
    },
    "Description": {
      "visibility": 0,
      "type": "text",
      "text": "${meta['Description']}",
      "priority": 2,
      "description": true,
      "subtitle": true
    },
    "Assay": {
      "visibility": 1,
      "type": "text",
      "text": "${meta.Assay}",
      "priority": 3,
      "MDI_Icon": "mdi-flask-outline"
    },
    "Organism": {
      "visibility": 1,
      "type": "text",
      "text": "${meta.Organism}",
      "priority": 4,
      "MDI_Icon": "mdi-human-child"
    },
    "Year": {
      "visibility": 1,
      "type": "text",
      "text": "${meta.Year}",
      "priority": 4,
      "MDI_Icon": "mdi-calendar"
    }
  }
}
```

In [19]:
from uuid import uuid4

In [139]:
with open("data/schemas/resource.json") as o:
    meta = json.loads(o.read())

resource_schema = {
    "id": str(uuid4()),
    "meta": meta
}

resource_schema["meta"]["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/ui-schema.json"

res = requests.post(API_url%("schemas",""), auth=credentials, json=resource_schema)
res.ok

True

In [140]:
with open("data/schemas/library.json") as o:
    meta = json.loads(o.read())

library_schema = {
    "id": str(uuid4()),
    "meta": meta
}

library_schema["meta"]["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/ui-schema.json"

res = requests.post(API_url%("schemas",""), auth=credentials, json=library_schema)
res.ok

True

In [141]:
with open("data/schemas/signature.json") as o:
    meta = json.loads(o.read())

signature_schema = {
    "id": str(uuid4()),
    "meta": meta
}

signature_schema["meta"]["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/ui-schema.json"

res = requests.post(API_url%("schemas",""), auth=credentials, json=signature_schema)
res.ok

True

In [142]:
with open("data/schemas/signature_creeds.json") as o:
    meta = json.loads(o.read())

signature_creeds_schema = {
    "id": str(uuid4()),
    "meta": meta
}

signature_creeds_schema["meta"]["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/ui-schema.json"

res = requests.post(API_url%("schemas",""), auth=credentials, json=signature_creeds_schema)
res.ok

True

In [127]:
# res = requests.get(API_url%("schemas",""))
# for i in res.json():
#     delete_data(i,"schemas")

### Add Filters
1.
```
{
    "Field_Name": "library",
    "Preferred_Name": "Libraries",
    "Preferred_Name_Singular": "Library",
    "Type": "string",
    "MDI_Icon": "mdi-library-books",
    "Table": "signatures",
    "Filter": true
}
 ```
 2.
 ```
{
    "Field_Name": "meta.Assay",
    "Type": "string",
    "Preferred_Name": "Assays",
    "Preferred_Name_Singular": "Assay",
    "Table": "signatures",
    "Filter": true,
    "MDI_Icon": "mdi-microscope"
}
 ```

In [143]:
with open("data/schemas/filters.json") as o:
    filters_meta = json.loads(o.read())

In [144]:
filters = []
for meta in filters_meta:
    meta["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
    filters.append({
        "id": str(uuid4()),
        "meta": meta
    })

In [145]:
for i in filters:
    post_data(i,"schemas")

### Add Stats and Charts
Tags:
1. Table_Count
2. Meta_Count
3. Pie_Count
4. Bar_Count
5. Bar_Score
6. Histogram
7. Word_Count

#### Table Count
```
[
    {
        "Field_Name": "signatures",
        "Type": "table",
        "Preferred_Name": "Signatures",
        "MDI_Icon": "mdi-fingerprint",
        "Table_Count": true,
        "Visible_On_Landing": true
    },
    {
        "Field_Name": "resources",
        "Type": "table",
        "Preferred_Name": "Web Resources",
        "MDI_Icon": "mdi-web",
        "Table_Count": true,
        "Visible_On_Landing": true
    }
]
```

In [146]:
with open("data/schemas/table_count.json") as o:
    table_count_meta = json.loads(o.read())

table_count = []
for meta in table_count_meta:
    meta["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
    data = {
        "id": str(uuid4()),
        "meta": meta
        }
    post_data(data,"schemas")

### Meta Count
```
{
    "Field_Name": "meta.Small_Molecule.Name",
    "Preferred_Name": "Small Molecules",
    "Type": "object",
    "MDI_Icon": "mdi-hexagon-multiple",
    "Table": "signatures",
    "Meta_Count": true,
}
```

In [147]:
with open("data/schemas/meta_count.json") as o:
    meta_count_meta = json.loads(o.read())

meta_count = []
for meta in meta_count_meta:
    meta["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
    data = {
        "id": str(uuid4()),
        "meta": meta
        }
    post_data(data,"schemas")

### Pie Count
```
{
    "Field_Name": "meta.Assay",
    "Preferred_Name": "Assays",
    "Preferred_Name_Singular": "Assay",
    "Type": "object",
    "MDI_Icon": "mdi-flask",
    "Table": "signatures",
    "Pie_Count": true,
}
```

In [148]:
with open("data/schemas/pie_count.json") as o:
    pie_count_meta = json.loads(o.read())

pie_count = []
for meta in pie_count_meta:
    meta["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
    data = {
        "id": str(uuid4()),
        "meta": meta
        }
    post_data(data,"schemas")

### Word Count
{
    "Field_Name": "meta.Small_Molecule.Name",
    "Preferred_Name": "Small Molecules",
    "Preferred_Name_Singular": "Small Molecule",
    "Type": "object",
    "MDI_Icon": "mdi-hexagon-multiple",
    "Table": "signatures",
    "Word_Count": true,
}

In [None]:
with open("data/schemas/word_count.json") as o:
    word_count_meta = json.loads(o.read())

pie_count = []
for meta in pie_count_meta:
    meta["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
    data = {
        "id": str(uuid4()),
        "meta": meta
        }
    post_data(data,"schemas")

### Bar Count
```
{
  "Field_Name": "meta.Assay",
  "Type": "string",
  "Preferred_Name": "Assays",
  "Table": "signatures",
  "Bar_Count": true
}
```

In [149]:
with open("data/schemas/bar_count_assay.json") as o:
    bar_count_assay = json.loads(o.read())

bar_count_assay["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
data = {
    "id": str(uuid4()),
    "meta": bar_count_assay
    }
post_data(data,"schemas")

In [150]:
with open("data/schemas/bar_count_readout.json") as o:
    bar_count_readout = json.loads(o.read())

bar_count_readout["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/counting.json"
data = {
    "id": str(uuid4()),
    "meta": bar_count_readout
    }
post_data(data,"schemas")

### Note
You can combine tags to reduce redundancy
```
{
    "Field_Name": "meta.Assay",
    "Preferred_Name": "Assays",
    "Preferred_Name_Singular": "Assay",
    "Type": "object",
    "MDI_Icon": "mdi-flask",
    "Table": "signatures",
    "Pie_Count": true,
    
}
```

## Customize the landing page
| Tag | type | default |
|--|--|--|
| header | string | Signature Commons |
|Text1| string | Explore an extensive collection of well-annotated gene-sets and signatures |
|Text2| string | Search across a broad gathering of perturbations |
|Text3| string | By |
|Text4| string | Start using Signature Commons in your project |
|metadata_placeholder | string | Search over half a million signatures|
|geneset_placeholder | string | Genes that are regulated in signature or overlap with gene set|
|up_genes_placeholder | string | Genes that are up-regulated in signature or overlap with gene set|
|down_genes_placeholder | string | Genes that are down-regulated in signature or overlap with gene set|
|geneset_terms| string | * see sigcom website for defaults * |
|weighted_geneset_terms| string | * see sigcom website for defaults * |
|up_set_terms| string | * see sigcom website for defaults * |
|down_set_terms| string | * see sigcom website for defaults * |
|search_terms | array | ['MCF10A', 'Imatinib', 'ZNF830', 'STAT3', 'Neuropathy'] |
|github | string | https://github.com/dcic/signature-commons-ui|
|github_issues | string | https://github.com/dcic/signature-commons-ui/issues|
|deactivate_download | boolean | true|
| maxResourcesBeforeCollapse | int | 60 |
| maxResourcesToShow | int | 40 |
| counting_validator | string | /dcic/signature-commons-schema/v5/meta/schema/counting.json |
| ui_schema | string | /dcic/signature-commons-schema/v5/meta/schema/ui-schema.json|
| footer_type | string | powered |
| nav | object | * See Below * |
| downloads | object | * See Below * |
| bar_chart_style | object | |
| pie_chart_style| object | |

nav:
```
{
    MetadataSearch: {
        active: true,
        endpoint: '/MetadataSearch',
    },
    SignatureSearch: {
        active: false,
        endpoint: '/SignatureSearch',
    },
    Resources: {
        active: true,
        endpoint: '/Resources',
    },
    API: {
        active: true,
        endpoint: '/API',
    },
}
``` 

downloads:
```
{
    gmt: 'Download gmt file',
    tsv: 'Download tsv file',
    geneset: 'Download gene set',
    ranked: 'Download ranked signature',
    signature_json: 'Download signature as json',
    library_json: 'Download library as json',
    resource_json: 'Download resource as json',
    sigcom: true,
    enrichr: true,
}
```

### Change default values
```
{
    "landing": true,
    "content": {
        "counting_validator": "/dcic/signature-commons-schema/v5/meta/schema/counting.json",
        "ui_schema": "/dcic/signature-commons-schema/v5/meta/schema/ui-schema.json",
        "header" : "Signature Commons Demo",
        "footer_type": "dcic",
        "search_terms" : ["HELA", "archs4", "tamoxifen"],
        "nav": {
        "MetadataSearch": {
          "active": true,
          "endpoint": "/MetadataSearch"
        },
        "SignatureSearch": {
          "active": false,
          "endpoint": "/GenesetEnrichment"
        },
        "Resources": {
          "active": true,
          "endpoint": "/WebResources"
        },
        "API": {
          "active": true
            }
        }
    }
}

 ```

In [151]:
with open("data/schemas/landing.json") as o:
    landing = json.loads(o.read())

landing["$validator"] = "/dcic/signature-commons-schema/v5/meta/schema/landing-ui.json"
data = {
    "id": str(uuid4()),
    "meta": landing
    }
post_data(data,"schemas")

## delete schemas

In [138]:
res = requests.get(API_url%("schemas",""))
for i in res.json():
    delete_data(i,"schemas")