In [16]:
import re
import os
import sys
import requests
import numpy as np
import pandas as pd
from tqdm import tqdm
import time

In [7]:
with open('data/table.html') as fr:
    table = fr.read()
hrefs = re.findall(r'href="([^"]*)"', table)

In [8]:
downloads = [ref.replace('results', 'gene_list/0/') for ref in hrefs]

In [10]:
for download in downloads:
    r = requests.get(download)
    if r.status_code == 200:
        with open(f"data/dexsigs/{download.split('/')[-1]}.txt", 'wb') as f:
            f.write(r.content)
    else:
        print('Failed')
    time.sleep(1)

In [11]:
import json
def query_consensus_l2s2(genes, sortby='pvalue'):
    import requests
    query = {
        "operationName": "EnrichmentQuery",
        "variables": {
            "filterTerm": "",
            "filterFda": False,
            "sortBy": sortby,
            "filterKo": False,
            "genes": genes,
            "pvalueLe": 1.1
        },
        "query": 'query EnrichmentQuery($genes: [String]!, $filterTerm: String = "", $offset: Int, $first: Int, $filterFda: Boolean = false, $sortBy: String = "", $filterKo: Boolean = false, $pvalueLe: Float = 1.1) {\n  currentBackground {\n    enrich(\n      genes: $genes\n      filterTerm: $filterTerm\n      offset: $offset\n      first: $first\n      filterFda: $filterFda\n      sortby: $sortBy\n      filterKo: $filterKo\n      pvalueLe: $pvalueLe\n    ) {\n     consensus {\n        drug\n        oddsRatio\n        pvalue\n        adjPvalue\n        approved\n        countSignificant\n        countInsignificant\n        countUpSignificant\n        pvalueUp\n        adjPvalueUp\n        oddsRatioUp\n        pvalueDown\n        adjPvalueDown\n        oddsRatioDown\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n',
    }
    res = requests.post(
    "http://l2s2.maayanlab.cloud/graphql",
    data=json.dumps(query),
    headers={'Content-Type': 'application/json'}
    )
    if res.status_code != 200:
        raise Exception("Query failed to run by returning code of {}. {}".format(res.status_code, query))
    
    res = res.json()
    return pd.DataFrame(res['data']['currentBackground']['enrich']['consensus'])

In [24]:
for signature in tqdm(os.listdir('data/dexsigs')):
    df = pd.read_csv(f"data/dexsigs/{signature}", sep='\t', index_col=0, header=None)
    #meta = df[df.index.str.startswith('!')]
    genes_cd = df[~df.index.str.startswith('!')]
    genes_cd.columns = ['CD']
    genes_cd.astype(float)
    up_genes = list(genes_cd[genes_cd['CD'].astype(float) > 0].index.values)
    down_genes = list(genes_cd[genes_cd['CD'].astype(float) < 0].index.values)
    try:
        res_df = query_consensus_l2s2(up_genes, sortby='pvalue')
        res_df.to_csv(f'data/dex_out/{signature.replace(".txt", "")}_up.tsv', sep='\t')
        time.sleep(1)
    except Exception as e:
        print(signature, e)
    try:
        res_df = query_consensus_l2s2(down_genes, sortby='pvalue')
        res_df.to_csv(f'data/dex_out/{signature.replace(".txt", "")}_down.tsv', sep='\t')
        time.sleep(1)
    except Exception as e:
        print(signature, e)

100%|██████████| 86/86 [20:08<00:00, 14.05s/it]


In [44]:
def clustergrammer_link(df: pd.DataFrame, filename: str):
    try:
        df.to_csv(f'data/{filename}.tsv', sep='\t')
        upload_url = 'http://amp.pharm.mssm.edu/clustergrammer/matrix_upload/'

        r = requests.post(upload_url, files={'file': open(f'data/{filename}.tsv', 'rb')})
        link = r.text
        return link
    except Exception as e:
        print('Error uploading to Clustergrammer:', e)
        return None

In [95]:
dfs = []
for signature in tqdm(os.listdir('data/dex_out')):
    if '_up' in signature:
        df = pd.read_csv(f"data/dex_out/{signature}", sep='\t', index_col=0)
        if df.empty:
            continue
        meta = pd.read_csv(f"data/dexsigs/{signature.replace('_up.tsv', '.txt')}", sep='\t', index_col=0)
        meta = meta[meta.index.str.startswith('!')].drop_duplicates()
        if '!perturbation' in meta.index:
            pert = meta.loc['!perturbation'].values[0]
        else: pert = ''
        if '!organism' in meta.index:
            organism = meta.loc['!organism'].values[0]
        else: organism = ''
        if '!cell' in meta.index:
            cell = meta.loc['!cell'].values[0]
        else: cell = ''
              
        df.set_index('drug', inplace=True, drop=True)
        term = f"{cell}_{organism}_{pert}"
        df[term] = -np.log10(df['pvalue'])
        df = df[term]
        dfs.append(df)

100%|██████████| 172/172 [00:01<00:00, 113.45it/s]


In [99]:
df_up = pd.concat(dfs, axis=1)
df_up_sub = df_up.dropna(thresh=len(df_up.columns)*0.98)
#df_up_sub = np.log(df_up[df_up.mean(axis=1) > 3])
df_up_sub.shape

(178, 82)

In [100]:
print(clustergrammer_link(np.log(df_up_sub), 'dex_up'))

http://maayanlab.cloud/clustergrammer/viz/6740a3e9357ed90010449d28/dex_up.tsv


In [101]:
dfs = []
for signature in tqdm(os.listdir('data/dex_out')):
    if '_down' in signature:
        df = pd.read_csv(f"data/dex_out/{signature}", sep='\t', index_col=0)
        if df.empty:
            continue
        meta = pd.read_csv(f"data/dexsigs/{signature.replace('_down.tsv', '.txt')}", sep='\t', index_col=0)
        meta = meta[meta.index.str.startswith('!')].drop_duplicates()
        if '!perturbation' in meta.index:
            pert = meta.loc['!perturbation'].values[0]
        else: pert = ''
        if '!organism' in meta.index:
            organism = meta.loc['!organism'].values[0]
        else: organism = ''
        if '!cell' in meta.index:
            cell = meta.loc['!cell'].values[0]
        else: cell = ''
              
        df.set_index('drug', inplace=True, drop=True)
        term = f"{cell}_{organism}_{pert}"
        df[term] = -np.log10(df['pvalue'])
        df = df[term]
        dfs.append(df)

100%|██████████| 172/172 [00:01<00:00, 108.85it/s]


In [104]:
df_dn = pd.concat(dfs, axis=1)
df_dn_sub = df_dn.dropna(thresh=len(df_dn.columns)*0.95)
df_dn_sub.shape

(149, 83)

In [122]:
print('Down Dex Signatures -- Dexamethasone -log10(p-value): ', df_dn.loc['dexamethasone'].mean(), '+/-', df_dn.loc['dexamethasone'].std(),', betamethasone:', df_dn.loc['betamethasone'].mean(), '+/-', df_dn.loc['betamethasone'].std())
print('Up Dex Signatures -- Dexamethasone -log10(p-value): ',  df_up.loc['dexamethasone'].mean(), '+/-', df_up.loc['dexamethasone'].std(),', betamethasone:', df_up.loc['betamethasone'].mean(), '+/-', df_up.loc['betamethasone'].std())

Down Dex Signatures -- Dexamethasone -log10(p-value):  2.7715713432883198 +/- 3.7606015935501516 , betamethasone: 0.572629832651498 +/- 0.5506887995223533
Up Dex Signatures -- Dexamethasone -log10(p-value):  10.409360513129398 +/- 10.661841389140571 , betamethasone: 0.8404351308214124 +/- 0.7136063329065105


In [106]:
print(clustergrammer_link(np.log(df_dn_sub), 'dex_dn'))

http://maayanlab.cloud/clustergrammer/viz/6740a433357ed90010449d42/dex_dn.tsv


In [116]:

for d in df_dn_sub.index.values:
    if 'dexamethasone' in d:
        print(d)
    elif 'betamethasone' in d:
        print(d)

In [None]:
geneset = ["AKT1", "CDK9", "STAT3", "STAT1", "AKT5"]

query = {
    "operationName": "EnrichmentQuery",
    "variables": {
        "filterTerm": " ",
        "offset": 0,
        "first": 12,
        "filterFda": False,
        "sortBy": "pvalue",
        "filterKo": False,
        "genes": geneset,
    },
    "query": """query EnrichmentQuery(
                    $genes: [String]!
                    $filterTerm: String = ""
                    $offset: Int = 0
                    $first: Int = 10
                    $filterFda: Boolean = false
                    $sortBy: String = ""
                    $filterKo: Boolean = false
                    ) {
                    currentBackground {
                        enrich(
                        genes: $genes
                        filterTerm: $filterTerm
                        offset: $offset
                        first: $first
                        filterFda: $filterFda
                        sortby: $sortBy
                        filterKo: $filterKo
                        ) {
                        nodes {
                            geneSetHash
                            pvalue
                            adjPvalue
                            oddsRatio
                            nOverlap
                            geneSets {
                            nodes {
                                id
                                term
                                description
                                nGeneIds
                                geneSetFdaCountsById {
                                nodes {
                                    approved
                                    count
                                    __typename
                                }
                                __typename
                                }
                                __typename
                            }
                            totalCount
                            __typename
                            }
                            __typename
                        }
                        totalCount
                        consensusCount
                        consensus {
                            drug
                            oddsRatio
                            pvalue
                            adjPvalue
                            approved
                            countSignificant
                            countInsignificant
                            countUpSignificant
                            pvalueUp
                            adjPvalueUp
                            oddsRatioUp
                            pvalueDown
                            adjPvalueDown
                            oddsRatioDown
                            __typename
                        }
                        __typename
                        }
                        __typename
                    }
                    }
                    """,
}



'CD58\n\nSLC25A46\n\n\nVDAC1\nSPR\n\nKDM3A\nPCCB\nBAG3\nRNH1\nDNAJB2\nFIS1\nHACD3\nSORBS3\nTOR1A\nTNIP1\nOXA1L\n\nNPDC1\nCRIP1\nADH5\nVPS28\nCHAC1'

In [11]:
import requests
import json

url = "https://l2s2.maayanlab.cloud/graphql"

def enrich_l2s2(geneset: list):
    query = {
    "operationName": "EnrichmentQuery",
    "variables": {
        "filterTerm": " ",
        "offset": 0,
        "first": 12,
        "filterFda": False,
        "sortBy": "pvalue",
        "filterKo": False,
        "genes": geneset,
    },
    "query": """query EnrichmentQuery(
                    $genes: [String]!
                    $filterTerm: String = ""
                    $offset: Int = 0
                    $first: Int = 10
                    $filterFda: Boolean = false
                    $sortBy: String = ""
                    $filterKo: Boolean = false
                    ) {
                    currentBackground {
                        enrich(
                        genes: $genes
                        filterTerm: $filterTerm
                        offset: $offset
                        first: $first
                        filterFda: $filterFda
                        sortby: $sortBy
                        filterKo: $filterKo
                        ) {
                        nodes {
                            geneSetHash
                            pvalue
                            adjPvalue
                            oddsRatio
                            nOverlap
                            geneSets {
                            nodes {
                                id
                                term
                                description
                                nGeneIds
                                geneSetFdaCountsById {
                                nodes {
                                    approved
                                    count
                                    __typename
                                }
                                __typename
                                }
                                __typename
                            }
                            totalCount
                            __typename
                            }
                            __typename
                        }
                        totalCount
                        consensusCount
                        consensus {
                            drug
                            oddsRatio
                            pvalue
                            adjPvalue
                            approved
                            countSignificant
                            countInsignificant
                            countUpSignificant
                            pvalueUp
                            adjPvalueUp
                            oddsRatioUp
                            pvalueDown
                            adjPvalueDown
                            oddsRatioDown
                            __typename
                        }
                        __typename
                        }
                        __typename
                    }
                    }
                    """,
    }

    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }

    response = requests.post(url, data=json.dumps(query), headers=headers)

    if response.status_code == 200:
        res = response.json()
        return res


In [12]:
print(enrich_l2s2(geneset))

{'data': {'currentBackground': {'enrich': {'nodes': [{'geneSetHash': '8ebb41be-6155-5241-a067-c64ac8a7d3b1', 'pvalue': 1.0787151011460342e-10, 'adjPvalue': 0.00018098757477085243, 'oddsRatio': 45.336, 'nOverlap': 6, 'geneSets': {'nodes': [{'id': 'a1f21e72-9830-45fd-836a-18ba22492081', 'term': 'MOAR008_NCIH838_24H_M15_INCB-3284_1.11uM down', 'description': None, 'nGeneIds': 250, 'geneSetFdaCountsById': {'nodes': [{'approved': False, 'count': 142, '__typename': 'GeneSetFdaCount'}], '__typename': 'GeneSetFdaCountsConnection'}, '__typename': 'GeneSet'}], 'totalCount': 1, '__typename': 'GeneSetsConnection'}, '__typename': 'EnrichResult'}, {'geneSetHash': 'a6660c23-2375-5396-9cfa-2820c6eaf872', 'pvalue': 2.9446597158692734e-08, 'adjPvalue': 0.0054895229821149754, 'oddsRatio': 37.78, 'nOverlap': 5, 'geneSets': {'nodes': [{'id': 'a1e773c1-0f75-454a-8b82-53de52e248bd', 'term': 'REP.A011_HUVEC.A_24H_I24_isbufylline_0.04uM up', 'description': None, 'nGeneIds': 250, 'geneSetFdaCountsById': {'nodes