# Drug screening dataset

In [1]:
import gilda
import pickle
import synapseclient
import pandas as pd
import numpy as np
import pandas as pd
import seaborn as sns
from rpy2 import robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()

INFO: [2020-11-13 21:48:07] /Users/johnbachman/Dropbox/1johndata/Knowledge File/Biology/Research/Big Mechanism/adeft/adeft/recognize.py - OneShotRecognizer not available. AdeftLongformScorer has not been built successfully.
INFO: [2020-11-13 21:48:08] keyring.backend - Loading SecretService
INFO: [2020-11-13 21:48:08] keyring.backend - Loading kwallet
INFO: [2020-11-13 21:48:08] keyring.backend - Loading macOS
INFO: [2020-11-13 21:48:08] keyring.backend - Loading windows


In [2]:
syn = synapseclient.Synapse()
syn.login()

Welcome, johnbachman!



INFO: [2020-11-13 21:48:10] synapseclient_default - Welcome, johnbachman!



## Process drug response data

### Download data as pandas DataFrame

In [3]:
drug_data_path = syn.get("syn20684161").path
drug_data = pd.read_csv(drug_data_path, low_memory=False)
pd.DataFrame.head(drug_data)

Unnamed: 0,model_name,model_type,cellosaurus_id,organism_name,disease_name,disease_efo_id,symptom_name,symptom_efo_id,experiment_synapse_id,study_synapse_id,funder,drug_name,DT_explorer_internal_id,dosage_unit,drug_screen_id,dosage,response_type,response,response_unit
0,N10,cell line,,human,no disease,,no symptom,,syn11373153.1,syn5610425,CTF,GPHR-00000018,159129,uM,1,"[0.0457246089549396,100]",AUC_Simpson,35.508784,
1,N5,cell line,,human,NF1,,no symptom,,syn11373157.1,syn5610425,CTF,GPHR-00000291,194936,uM,10,"[0.0457246089549396,100]",AUC_Simpson,4.01805,
2,N5,cell line,,human,NF1,,no symptom,,syn11373207.1,syn5610425,CTF,GPHR-00101658,313081,uM,100,"[0.0457246089549396,100]",AUC_Simpson,23.566992,
3,N5,cell line,,human,NF1,,no symptom,,syn11373765.1,syn5610425,CTF,GPHR-00224355,313317,uM,1000,"[0.0457246089549396,100]",AUC_Simpson,22.773713,
4,Syn5,cell line,,human,NF2,,no symptom,,syn12293953.1,syn2343195,CTF,NCGC00241342-02,150042,uM,10000,"[0.00078041474654,46.082949308]",AUC_Simpson,45.188508,


### Filter data
Here we filter the data to drugs that have an IC50 of at most 10.

In [4]:
IC50_CUTOFF = 10
pnf = ["ipNF05.5", "ipNF06.2A", "ipNF95.11b C/T", "ipnNF95.11C", "ipNF95.6", "ipNF05.5 (mixed clone)", "ipNF95.11b C"]

drug_data_pnf = (drug_data
      .query('response_type == "IC50_abs"')
      .query('model_name == @pnf')
      .groupby('drug_screen_id').filter(lambda x: len(x)==1)
      .assign(response = lambda x: [50 if a >= 50 else a for a in x['response']]))
drug_data_pnf_positive = drug_data_pnf[drug_data_pnf.response < IC50_CUTOFF]
positive_drug_ids = sorted(list(drug_data_pnf_positive.DT_explorer_internal_id.unique()))
len(positive_drug_ids)

INFO: [2020-11-13 21:48:23] numexpr.utils - Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO: [2020-11-13 21:48:23] numexpr.utils - NumExpr defaulting to 8 threads.


1072

## Process drug-target data

In [5]:
targetspath = syn.get('syn17091507')
readRDS = robjects.r['readRDS']
targets = readRDS(targetspath.path)

In [6]:
targets.head()

Unnamed: 0,internal_id,hugo_gene,n_quantitative,mean_pchembl,cv,sd,IC50_nM,AC50_nM,EC50_nM,Potency_nM,Ki_nM,Kd_nM,n_qualitative,std_name,total_n,confidence,pchembl_d,pchembl_t,known_selectivity_index
1,3,HTR7,1,8.01,,,,,,,9.7,,-2147483648,CHEMBL2413451,1,-0.229,8.01,10322.9,1.0
2,4,CHRNA4,1,7.54,,,,,,,29.0,,-2147483648,CHEMBL204871,1,-0.229,15.08,4501.97,0.5
3,4,CHRNB2,1,7.54,,,,,,,29.0,,-2147483648,CHEMBL204871,1,-0.229,15.08,4194.75,0.5
4,5,GSK3A,2,7.76,5.285076,0.410122,34.0,,,,9.0,,-2147483648,CHEMBL3582401,2,0.393,7.76,2034.63,1.0
5,6,FAAH,1,9.3,,,0.5,,,,,,-2147483648,CHEMBL2386554,1,-0.229,9.3,6375.76,1.0


Here we get the names of the drugs that passed the IC50 filter above

In [7]:
drug_names = \
    sorted(targets.std_name[
        targets.internal_id.isin(set(positive_drug_ids))].unique())

In [8]:
len(drug_names)

661

Now we need to find groundings for these drug names

In [9]:
from indra.ontology.bio import bio_ontology
drug_groundings = {}
for drug_name in drug_names:
    grounding = None
    # First handle names that are actually CHEMBL IDs
    if drug_name.startswith('CHEMBL'):
        grounding = ('CHEMBL', drug_name)
    # Next, try looking up some of these by standard name
    # This is needed because we have CHEMBL standard names in INDRA
    # that Gilda doesn't have e.g., TOZASERTIB
    else:
        for ns in ['CHEMBL', 'CHEBI']:
            ns_id_from_standard_name = bio_ontology.get_id_from_name(ns, drug_name)
            if ns_id_from_standard_name:
                grounding = ns_id_from_standard_name
                break
            else:
                ns_id_from_standard_name = bio_ontology.get_id_from_name(ns, drug_name.upper())
                if ns_id_from_standard_name:
                    grounding = ns_id_from_standard_name
                    break
        # Finally, try Gilda to see if the name can be grounded
        else:
            matches = gilda.ground(drug_name)
            if matches:
                grounding = (matches[0].term.db, matches[0].term.id)
    drug_groundings[drug_name] = grounding if grounding else None

INFO: [2020-11-13 21:48:38] indra.ontology.bio.ontology - Loading INDRA bio ontology from cache at /Users/johnbachman/.indra/bio_ontology/1.4/bio_ontology.pkl
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: (+) jq1, (+)jq1
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing (+)-JQ1 with 1 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: (3 {3 [[2 chloro 3 (trifluoromethyl)benzyl](2,2 diphenylethyl)amino]propoxy}phenyl)acetic acid, (3{3[[2chloro3(trifluoromethyl)benzyl](2,2diphenylethyl)amino]propoxy}phenyl)acetic acid
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing (3-{3-[[2-Chloro-3-(Trifluoromethyl)Benzyl](2,2-Diphenylethyl)Amino]Propoxy}Phenyl)Acetic Acid with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: (3e)6'bromo2,3'biindole2',3(1h,1'h)dione 3oxime, (3e) 6' bromo 2,3' biindole 2',3(1h,1'h) dione 3 oxime, (3e)6'bromo2,3'biindole2',3(1h,1'h)dione 3oξme
INFO: [2

INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing AM-630 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: amg 458, amg458
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing AMG-458 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: as1949490
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing AS1949490 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: av412, av 412
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing AV-412 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: az628
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing AZ628 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: azd8931
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing AZD8931 with 2 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: apatin

INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing MK-3207 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: ml315
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing ML315 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: nsc 123538, nsc123538
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing NSC-123538 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: nsc 2801, nsc2801
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing NSC-2801 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: nsc663284, nsc 663284
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing NSC-663284 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Looking up the following strings: nu 7432, nu7432
INFO: [2020-11-13 21:48:56] gilda.grounder - Comparing NU-7432 with 0 entries
INFO: [2020-11-13 21:48:56] gilda.grounder - Loo

In [10]:
'Ungrounded drugs: %s' % len([k for k, v in drug_groundings.items() if not v])

'Ungrounded drugs: 80'

Now we turn the drugs names and groundings into standardized INDRA Agents

In [11]:
from indra.statements import Agent, Inhibition
from indra.ontology.standardize import standardize_name_db_refs
drug_agents = []
for drug_name, grounding in drug_groundings.items():
    if not grounding:
        agent = Agent(drug_name)
        drug_agents.append(agent)
        continue
    standard_name, db_refs = standardize_name_db_refs({grounding[0]: grounding[1]})
    agent_name = standard_name if standard_name else drug_name
    agent = Agent(agent_name, db_refs=db_refs)
    drug_agents.append(agent)

We also have to make a standardized INDRA Agent representing cell proliferation

In [12]:
match = gilda.ground('cell proliferation')[0]
standard_name, db_refs = standardize_name_db_refs({match.term.db: match.term.id})
cell_proliferation = Agent(standard_name, db_refs=db_refs)

INFO: [2020-11-13 20:47:34] gilda.grounder - Looking up the following strings: cell proliferation
INFO: [2020-11-13 20:47:34] gilda.grounder - Comparing cell proliferation with 2 entries


We can now create a set of Inhibition statements between each drug and cell proliferation, and repersent these as EMMAA StatementCheckingTests.

In [13]:
from emmaa.model_tests import StatementCheckingTest
test_statements = [StatementCheckingTest(Inhibition(drug_agent, cell_proliferation))
                   for drug_agent in drug_agents]

INFO: [2020-11-13 20:47:41] covid_19.preprocess - Latest data release is 2020-11-13
INFO: [2020-11-13 20:47:41] pybel.config - no configuration found, using default sqlite connection sqlite:////Users/johnbachman/.pybel/pybel_0.14.0_cache.db
INFO: [2020-11-13 20:47:46] indra.preassembler.grounding_mapper.disambiguate - INDRA DB is not available for text content retrieval for grounding disambiguation.
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs:

INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: 

INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: 

INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: 

INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: 

INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: None
INFO: [2020-11-13 20:47:48] emmaa.model_tests - Test configs: 

In [14]:
import pickle
with open('drug_screening_test_stmts.pkl', 'wb') as fh:
    pickle.dump(test_statements, fh)

Given the list of drugs we identified, we can also get all INDRA Statements that report one of these drugs as inhibiting or binding to a protein target.

In [15]:
from covid_19.get_drug_statements import get_drug_groundings, get_drug_statements
drug_groundings = get_drug_groundings(drug_agents)
drug_stmts = get_drug_statements(drug_groundings)

ImportError: cannot import name 'get_drug_groundings' from 'covid_19.get_drug_statements' (/Users/johnbachman/Dropbox/1johndata/Knowledge File/Biology/Research/Big Mechanism/covid-19/covid_19/get_drug_statements.py)

In [None]:
with open('drug_statements.pkl', 'wb') as fh:
    pickle.dump(drug_stmts, fh)

## Notebook for old dataset

In [None]:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()


In [None]:
targetspath = syn.get('syn17091507')
readRDS = robjects.r['readRDS']

targets = readRDS(targetspath.path)

In [None]:

targets_filt = (targets
                .query('mean_pchembl > 6')
                .filter(["internal_id", "hugo_gene", "std_name"])
                .drop_duplicates())
    
pd.DataFrame.head(targets_filt)

In [None]:
drug_data_path = syn.get("syn17462699").path
drug_data = pd.read_csv(drug_data_path, low_memory=False)
                       
pd.DataFrame.head(drug_data)