In [37]:
## Full backend analysis for the CREEDS and L1000 dataset 
import os
import pandas as pd
from pandas.compat import StringIO
import numpy as np
from numpy import loadtxt
import sys
import json
from pprint import pprint
import objectpath
import csv
import re
import matplotlib.pyplot as plt
import json, requests
from pprint import pprint
import itertools
import scipy
from scipy.spatial import distance
from sklearn.metrics.pairwise import pairwise_distances
from clustergrammer_widget import *
def get_geneset(df, indexer):
    df_ = df.loc[indexer, :]
    return list(df_[df_ == 1].index)

In [174]:
### L1000 PRELOAD DATA DRUG CARD
with open ("L1000_up_lookup.json", 'r') as f:
    L1000_up_lookup = json.load(f)

with open ("L1000_down_lookup.json", 'r') as f:
    L1000_down_lookup = json.load(f)
    
metadata = pd.read_csv("L1000_metadata.csv")

### L1000 DRUG CARD DRUG INPUT
DrOI = "tretinoin" ## this will be the user input 

DrOI_df = metadata[metadata["pert_desc"] == DrOI]
DrOI_pert_ids = list(DrOI_df["pert_id"])

DrOI_up_signatures = {k: L1000_up_lookup[k] for k in (DrOI_pert_ids)}
DrOI_up_no_perts = {k: v for d in DrOI_up_signatures.values() for k, v in d.items()}
DrOI_up_drug_sigs = list(DrOI_up_no_perts.keys())

DrOI_down_signatures = {k: L1000_down_lookup[k] for k in (DrOI_pert_ids)}
DrOI_down_no_perts = {k: v for d in DrOI_down_signatures.values() for k, v in d.items()}
DrOI_down_drug_sigs = list(DrOI_down_no_perts.keys())

DrOI_all_sigs = set(DrOI_up_drug_sigs) & set (DrOI_down_drug_sigs)

for a in DrOI_all_sigs:
    L1000_up_json_file = DrOI_up_no_perts[a]
    L1000_down_json_file = DrOI_down_no_perts[a]
    with open(a + "_L1000_up_sig.json", "w") as f:
        #json.dump(L1000_up_json_file, f)
    with open(a + "_L1000_down_sig.json", "w") as f:
        #json.dump(L1000_down_json_file, f)

In [149]:
DrOI_df

Unnamed: 0,rid,SCS_centered_by_batch,batch,cell_id,mean_cosine_dist_centered_by_batch,pert_desc,pert_dose,pert_id,pert_time
86,ASG001_MCF7_24H:BRD-K71879491-001-17-6:0.08,0.0,ASG001_MCF7_24H,MCF7,,tretinoin,0.08,BRD-K71879491,24.0
87,ASG001_MCF7_24H:BRD-K71879491-001-17-6:0.4,0.0,ASG001_MCF7_24H,MCF7,,tretinoin,0.4,BRD-K71879491,24.0
88,ASG001_MCF7_24H:BRD-K71879491-001-17-6:10,0.0,ASG001_MCF7_24H,MCF7,,tretinoin,10.0,BRD-K71879491,24.0
89,ASG001_MCF7_24H:BRD-K71879491-001-17-6:2,0.0,ASG001_MCF7_24H,MCF7,,tretinoin,2.0,BRD-K71879491,24.0
162,ASG001_MCF7_6H:BRD-K71879491-001-17-6:0.08,0.0,ASG001_MCF7_6H,MCF7,,tretinoin,0.08,BRD-K71879491,6.0
163,ASG001_MCF7_6H:BRD-K71879491-001-17-6:0.4,0.0,ASG001_MCF7_6H,MCF7,,tretinoin,0.4,BRD-K71879491,6.0
164,ASG001_MCF7_6H:BRD-K71879491-001-17-6:10,0.0,ASG001_MCF7_6H,MCF7,,tretinoin,10.0,BRD-K71879491,6.0
165,ASG001_MCF7_6H:BRD-K71879491-001-17-6:2,0.0,ASG001_MCF7_6H,MCF7,,tretinoin,2.0,BRD-K71879491,6.0
220,ASG001_PC3_24H:BRD-K71879491-001-17-6:0.4,0.0049,ASG001_PC3_24H,PC3,0.911756,tretinoin,0.4,BRD-K71879491,24.0
221,ASG001_PC3_24H:BRD-K71879491-001-17-6:10,0.0,ASG001_PC3_24H,PC3,0.778611,tretinoin,10.0,BRD-K71879491,24.0


In [None]:
######################################################################################################

In [110]:

# load in the EMR Data (filtered > 200 in R code [Drug_diagnosis_test_code.R])
EMR_data = pd.read_csv("EMR_greater_200.csv")
## subset EMR data by the DOI and/or DrOI
EMR_data_df = pd.DataFrame(EMR_data)
#EMR_data
EMR_data_df.drop(EMR_data_df.columns[[0]], axis = 1, inplace = True) # remove the unecessary columns
#EMR_data_df

# implement the search from ICD9-do_id from the manual conversion
icd9_to_doid = pd.read_csv("ICD9_CREEDS_conversion.csv")
icd9_to_doid = pd.DataFrame(icd9_to_doid) # convert it to a data fram to drop unecessary rows
#icd9_to_doid # sanity check
icd9_to_doid_final = icd9_to_doid.drop(icd9_to_doid.columns[[0, 6, 7, 8, 9, 10, 11, 12, 13, 14]], axis = 1)  

## L1000 ANALYSIS -- FEEDS BACK INTO THE API to get additional signatures
metadata = pd.read_csv("L1000_metadata.csv")
#metadata ## same as LINC1000h5.row_metadata_df

EMR_Drug_Names = EMR_data_df['Drug_Name'] # this will be the selection for the dropdown menu
## get the subselection of drug names

In [262]:
###### GENERATE THE FULL ANALYSIS BETWEEN CREEDS AND L1000 GIVEN A DISEASE OF INTEREST

# USER INPUT
####
DOI = "colon adenoma" # disease of interest. CAN TAKE FROM posssible_disease_list FOR NOW
####

### DISEASE --> ICD9 --> DOI
# get the ICD9 from the DOI
DOI_ICD9 = icd9_to_doid_final[icd9_to_doid_final.Disease.apply(lambda s: bool(re.compile(DOI, re.IGNORECASE).search(s)))]
DOI_ICD9_codes = DOI_ICD9.ICD9

# get the do_id from the DOI
DOI_DOID_codes = DOI_ICD9.DOID

# get the do_id from the DOI
DOI_CREEDS_codes = DOI_ICD9.CREEDS_drug_id
    
##filter by DOI. Need to convert DOI to ICD9 first.
icd9_to_doid_final_search = icd9_to_doid_final[icd9_to_doid_final["Disease"].apply(lambda s: bool(re.compile(DOI, re.IGNORECASE).search(s)))]
icd9_to_doid_final_search1 = icd9_to_doid_final_search["ICD9"]

## rebuild the wildcard dataframe
icd9_wildcard = pd.DataFrame(icd9_to_doid_final_search1, columns=['ICD9'])
icd9_wildcard['ICD9_wildcard'] = icd9_wildcard['ICD9'].apply(lambda code: str(code).split('.')[0])
icd9_wildcard.head()

icd9_to_doid_final['ICD9_wildcard'] = icd9_to_doid_final['ICD9'].apply(lambda code: str(code).split('.')[0])
ICD9_df_joined = pd.merge(
    left=icd9_wildcard, left_on='ICD9_wildcard',
    right=icd9_to_doid_final, right_on='ICD9_wildcard',
    how='inner',
    suffixes=(
        '_Manual',
        '_right',
    )
)

emr_sub = EMR_data_df[EMR_data_df["Description"].apply(lambda s: bool(re.compile(str(DOI), re.IGNORECASE).search(str(s))))]
emr_sub.reset_index(drop = True, inplace = True)

emr_sub = []
for a in ICD9_df_joined.ICD9_wildcard.unique():
    emr_sub1 = EMR_data_df[EMR_data_df["ICD9"].apply(lambda s: bool(re.compile(str(a), re.IGNORECASE).search(str(s))))]
    emr_sub.append(emr_sub1)
emr_sub_df = pd.concat(emr_sub)
top_drugs_from_disease = list(emr_sub_df.Drug_Name[0:5]) #take the top 5 drugs
top_drugs_from_disease


single_word_drugs = []
for i in top_drugs_from_disease:
    j = str(i)
    splitted = j.split()
    first_word = splitted[0]
    single_word_drugs.append(first_word) 
single_word_drugs = list(set(single_word_drugs))


# Generate a blacklist process
def process_blacklist(s):
    import re
    blacklist = [
        # remove the classifications of the drugs
        re.compile(r'INJ', re.IGNORECASE),
        re.compile(r'CAP', re.IGNORECASE),
        re.compile(r'\d+', re.IGNORECASE),
        
        # remove drugs that aren't in the L1000
        re.compile(r'SODIUM', re.IGNORECASE),
        re.compile(r'HEPATITIS', re.IGNORECASE),
        re.compile(r'HEPARIN', re.IGNORECASE),
        re.compile(r'CALCIUM', re.IGNORECASE),
        
    ]
    for b in blacklist:
        s = re.sub(b, '', s)
    return s.strip()
single_word_drugs_list = list(pd.Series(single_word_drugs).map(process_blacklist))
single_word_drugs_list


## L1000 API Integration
L1000FWD_URL = 'http://amp.pharm.mssm.edu/L1000FWD/'


L1000_reverse_drugs_store = []
L1000_reverse_pertids_store = []

drug_and_perts = pd.DataFrame()
for a in single_word_drugs_list:
    query_string = a
    L1000_reverse_drug_response = requests.get(L1000FWD_URL + 'synonyms/' + query_string)
    if L1000_reverse_drug_response.status_code == 200:
        #pprint(L1000_reverse_drug_response.json())
        L1000_reverse_significant_query = L1000_reverse_drug_response.json()
        if len(L1000_reverse_significant_query) > 0:
            #json.dump(L1000_reverse_drug_response.json(), open(query_string + '_L1000_reverse_drug_query.json', 'w'), indent=4)
            L1000_reverse_significant_query = L1000_reverse_drug_response.json()
            L1000_reverse_significant_query_df = pd.DataFrame(L1000_reverse_significant_query)
            L1000_reverse_pertids_store.append(list(L1000_reverse_significant_query_df["pert_id"]))
            
            # store the drug name with the pert ID
            drug_and_perts = drug_and_perts.append(L1000_reverse_significant_query_df)
            
            L1000_reverse_drugs_store.append(a)            
            print("Found significant L1000 drug signatures for " + query_string)            
        else:
            print("No significant L1000 drug signatures for " + query_string)     
  
    
L1000_reverse_pertids_flat = []
for sublist in L1000_reverse_pertids_store:
    for item in sublist:
        L1000_reverse_pertids_flat.append(item) 

L1000_reverse_pertids_flat = set(list(L1000_reverse_pertids_flat)) & set(list(metadata["pert_id"])) & set(list(L1000_up_lookup.keys()))

DrOI_disease_up_signatures = {k: L1000_up_lookup[k] for k in (L1000_reverse_pertids_flat)}
DrOI_disease_up_no_perts = {k: v for d in DrOI_disease_up_signatures.values() for k, v in d.items()}
DrOI_disease_up_drug_sigs = list(DrOI_disease_up_no_perts.keys())

DrOI_disease_down_signatures = {k: L1000_down_lookup[k] for k in (L1000_reverse_pertids_flat)}
DrOI_disease_down_no_perts = {k: v for d in DrOI_disease_down_signatures.values() for k, v in d.items()}
DrOI_disease_down_drug_sigs = list(DrOI_disease_down_no_perts.keys())

DrOI_disease_all_sigs = set(DrOI_disease_up_drug_sigs) & set(DrOI_disease_down_drug_sigs)

for a in DrOI_disease_all_sigs:
    L1000_up_json_file = DrOI_disease_up_no_perts[a]
    L1000_down_json_file = DrOI_disease_down_no_perts[a]
    #with open(a + "_L1000_disease_up_sig.json", "w") as f:
        #json.dump(L1000_up_json_file, f)
   # with open(a + "_L1000_disease_down_sig.json", "w") as f:
        #json.dump(L1000_down_json_file, f)


Found significant L1000 drug signatures for Midazolam
No significant L1000 drug signatures for Fentanyl
Found significant L1000 drug signatures for Propofol


In [278]:
drug_and_perts = pd.DataFrame()
for a in single_word_drugs_list:
    query_string = a
    L1000_reverse_drug_response = requests.get(L1000FWD_URL + 'synonyms/' + query_string)
    if L1000_reverse_drug_response.status_code == 200:
        #pprint(L1000_reverse_drug_response.json())
        L1000_reverse_significant_query = L1000_reverse_drug_response.json()
        if len(L1000_reverse_significant_query) > 0:
            #json.dump(L1000_reverse_drug_response.json(), open(query_string + '_L1000_reverse_drug_query.json', 'w'), indent=4)
            L1000_reverse_significant_query = L1000_reverse_drug_response.json()
            L1000_reverse_significant_query_df = pd.DataFrame(L1000_reverse_significant_query)
            L1000_reverse_pertids_store.append(list(L1000_reverse_significant_query_df["pert_id"]))
            
            drug_and_perts = drug_and_perts.append(L1000_reverse_significant_query_df)
            
            L1000_reverse_drugs_store.append(a)            
            print("Found significant L1000 drug signatures for " + query_string)            
        else:
            print("No significant L1000 drug signatures for " + query_string)     
            
            
    
L1000_reverse_pertids_flat = []
for sublist in L1000_reverse_pertids_store:
    for item in sublist:
        L1000_reverse_pertids_flat.append(item) 


L1000_reverse_pertids_flat = set(list(L1000_reverse_pertids_flat)) & set(list(metadata["pert_id"])) & set(list(L1000_up_lookup.keys()))
drug_and_perts_final = pd.DataFrame()
for q in (L1000_reverse_pertids_flat):
    drug_and_perts_final = drug_and_perts_final.append(drug_and_perts[drug_and_perts["pert_id"] == q])
    

DrOI_disease_up_signatures = {k: L1000_up_lookup[k] for k in (L1000_reverse_pertids_flat)}
DrOI_disease_up_no_perts = {k: v for d in DrOI_up_signatures.values() for k, v in d.items()}
DrOI_disease_up_drug_sigs = list(DrOI_disease_up_no_perts.keys())
            

Found significant L1000 drug signatures for Midazolam
No significant L1000 drug signatures for Fentanyl
Found significant L1000 drug signatures for Propofol


In [285]:
list(drug_and_perts_final[drug_and_perts_final["pert_id"] == list(DrOI_disease_up_signatures.keys())].Name)

['PROPOFOL']

In [289]:
drug_and_perts_final

Unnamed: 0,Name,pert_id
0,PROPOFOL,BRD-K82255054


In [287]:
DrOI_disease_up_no_perts

{'CPC014_HT29_6H:BRD-K71879491-001-22-6:10': ['TAP1',
  'FXYD2',
  'TSPAN12',
  'CPD',
  'HBB',
  'PODXL',
  'SCO2',
  'APOC1',
  'FBLN1',
  'S100A12',
  'SLC30A1',
  'DDX39A',
  'STUB1',
  'BHLHE41',
  'GPR183',
  'CIAPIN1',
  'DHRS3',
  'LYZ',
  'PCF11',
  'MARCKS',
  'C13ORF15',
  'ARHGEF10',
  'CDKN2C',
  'SOD2',
  'PLBD1',
  'TSC22D1',
  'ABP1',
  'TSC22D3',
  'IFI30',
  'FCGBP',
  'TSPAN1',
  'SSH3',
  'FZD10',
  'TMEM176B',
  'LY96',
  'PATZ1',
  'CTSS',
  'GATA2',
  'KRAS',
  'VAT1',
  'CX3CR1',
  'KIAA0930',
  'CCND1',
  'ACSL1',
  'RBBP6',
  'C1QA',
  'PTEN',
  'CEP70',
  'TYROBP',
  'LXN',
  'CTBP2',
  'APOBEC3G',
  'FBP1',
  'IL7R',
  'CCL5',
  'HP',
  'GPM6B',
  'COPS6',
  'FCN1',
  'CCDC86',
  'PDZK1IP1',
  'EDNRA',
  'EDNRB',
  'FAM13A',
  'PHLDA1',
  'IGKC',
  'NKG7',
  'OLFM4',
  'PARP8',
  'TCTN1',
  'GUCY1A3',
  'PPP6C',
  'MSMB',
  'TACSTD2',
  'SULT1C2',
  'G0S2',
  'VNN1',
  'C10ORF116',
  'CDK6',
  'SLC14A1',
  'FOS',
  'GBP1',
  'PSMB8',
  'TP53I3',
  'SAMM50',


In [288]:
DrOI_disease_up_signatures

{'BRD-K82255054': {'CPD003_PC3_6H:BRD-K82255054-001-11-8:10': ['TAP1',
   'PTGS2',
   'CD52',
   'PCK1',
   'TIMP2',
   'GHR',
   'TUSC3',
   'GNPTAB',
   'HSPA8',
   'FADS1',
   'HIPK1',
   'FBP1',
   'LEPROT',
   'LYPD1',
   'FRY',
   'CPM',
   'SFTPB',
   'SPG20',
   'SFTPC',
   'EML3',
   'ACAT2',
   'CCL5',
   'FGR',
   'MICAL2',
   'CDH1',
   'IRAK3',
   'WEE1',
   'CDH2',
   'MSMB',
   'CBLB',
   'SLC38A6',
   'AQP3',
   'TRHDE',
   'RNASE6',
   'IDH1',
   'TPSB2',
   'CGRRF1',
   'BAZ1A',
   'PHLDA1',
   'STK3',
   'DHX29',
   'GATA6',
   'RFNG',
   'TMEM100',
   'RNF19B',
   'TNS1',
   'PDE8B',
   'KRAS',
   'BRP44',
   'ADCY1',
   'CNN3',
   'PLD3',
   'CXCL9',
   'TRAPPC3',
   'SLC2A3',
   'TCF7L2',
   'AHR',
   'ENC1',
   'CCL19',
   'PID1',
   'PAK1',
   'TNFSF13',
   'ATP6V1D',
   'CD1E',
   'DMD',
   'PLCB1',
   'CRELD2',
   'DES',
   'SPON1',
   'NDNF',
   'WIF1',
   'PTGDS',
   'EDNRB',
   'KLHDC2',
   'IL1R1',
   'NEK7',
   'SMURF2',
   'TUBA1A',
   'NT5E',
   'SLCO4C

In [290]:
def drug_name_lookup(pert_id):
    drug_name = list(drug_and_perts_final[drug_and_perts_final["pert_id"] == list(DrOI_disease_up_signatures.keys())].Name)
    return drug_name

In [291]:
[
    { 'drug_name': drug_name_lookup(pert_id), 'sig_id': sig_id, "pert_id": pert_id, "up_genes": up_genes }
    for pert_id, obj in DrOI_disease_up_signatures.items()
    for sig_id, up_genes in obj.items()
]

[{'drug_name': ['PROPOFOL'],
  'sig_id': 'CPD003_PC3_6H:BRD-K82255054-001-11-8:10',
  'pert_id': 'BRD-K82255054',
  'up_genes': ['TAP1',
   'PTGS2',
   'CD52',
   'PCK1',
   'TIMP2',
   'GHR',
   'TUSC3',
   'GNPTAB',
   'HSPA8',
   'FADS1',
   'HIPK1',
   'FBP1',
   'LEPROT',
   'LYPD1',
   'FRY',
   'CPM',
   'SFTPB',
   'SPG20',
   'SFTPC',
   'EML3',
   'ACAT2',
   'CCL5',
   'FGR',
   'MICAL2',
   'CDH1',
   'IRAK3',
   'WEE1',
   'CDH2',
   'MSMB',
   'CBLB',
   'SLC38A6',
   'AQP3',
   'TRHDE',
   'RNASE6',
   'IDH1',
   'TPSB2',
   'CGRRF1',
   'BAZ1A',
   'PHLDA1',
   'STK3',
   'DHX29',
   'GATA6',
   'RFNG',
   'TMEM100',
   'RNF19B',
   'TNS1',
   'PDE8B',
   'KRAS',
   'BRP44',
   'ADCY1',
   'CNN3',
   'PLD3',
   'CXCL9',
   'TRAPPC3',
   'SLC2A3',
   'TCF7L2',
   'AHR',
   'ENC1',
   'CCL19',
   'PID1',
   'PAK1',
   'TNFSF13',
   'ATP6V1D',
   'CD1E',
   'DMD',
   'PLCB1',
   'CRELD2',
   'DES',
   'SPON1',
   'NDNF',
   'WIF1',
   'PTGDS',
   'EDNRB',
   'KLHDC2',
   '

In [265]:
drug_and_perts_final = pd.DataFrame()
for q in (L1000_reverse_pertids_flat):
    drug_and_perts_final = drug_and_perts_final.append(drug_and_perts[drug_and_perts["pert_id"] == q])

In [266]:
drug_and_perts[drug_and_perts["pert_id"] == q]

Unnamed: 0,Name,pert_id
0,PROPOFOL,BRD-K82255054


In [267]:
drug_and_perts_final

Unnamed: 0,Name,pert_id
0,MIDAZOLAM,BRD-K00532621
0,PROPOFOL,BRD-K82255054
0,MIDAZOLAM,BRD-K00532621
0,PROPOFOL,BRD-K82255054


In [193]:
## X2K API integration 

# Import modules
import http.client
import json

##### Function to run X2K
### Input: a Python list of gene symbols
### Output: a dictionary containing the results of X2K, ChEA, G2N, KEA.


def run_X2K(input_genes, options={}):
    # Open HTTP connection
    conn = http.client.HTTPConnection("amp.pharm.mssm.edu")

    # Set default options
    default_options = {'text-genes': '\n'.join(input_genes),
                       'included_organisms': 'both',
                       'TF-target gene background database used for enrichment': 'ChEA & ENCODE Consensus',
                       'sort transcription factors by': 'p-value',
                       'min_network_size': 10,
                       'number of top TFs': 10,
                       'path_length': 2,
                       'min_number_of_articles_supporting_interaction': 0,
                       'max_number_of_interactions_per_protein': 200,
                       'max_number_of_interactions_per_article': 100,
                       'enable_BioGRID': True,
                       'enable_IntAct': True,
                       'enable_MINT': True,
                       'enable_ppid': True,
                       'enable_Stelzl': True,
                       'kinase interactions to include': 'kea 2018',
                       'sort kinases by': 'p-value'}

    # Update options
    for key, value in options.items():
        if key in default_options.keys() and key != 'text-genes':
            default_options.update({key: value})

    # Get payload
    boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW"
    payload = ''.join(
        ['--' + boundary + '\r\nContent-Disposition: form-data; name=\"{key}\"\r\n\r\n{value}\r\n'.format(**locals())
         for key, value in default_options.items()]) + '--' + boundary + '--'

    # Get Headers
    headers = {
        'content-type': "multipart/form-data; boundary=" + boundary,
        'cache-control': "no-cache",
    }

    # Initialize connection
    conn.request("POST", "/X2K/api", payload, headers)

    # Get response
    res = conn.getresponse()

    # Read response
    data = res.read().decode('utf-8')

    # Convert to dictionary
    x2k_results = {key: json.loads(value) if key != 'input' else value for key, value in json.loads(data).items()}

    # Clean results
    x2k_results['ChEA'] = x2k_results['ChEA']['tfs']
    x2k_results['G2N'] = x2k_results['G2N']['network']
    x2k_results['KEA'] = x2k_results['KEA']['kinases']
    x2k_results['X2K'] = x2k_results['X2K']['network']

    # Return results
    return x2k_results

############################################################################################################################################################
#### L1000 DRUG X2K


### L1000 PRELOAD DATA DRUG CARD
with open ("L1000_up_lookup.json", 'r') as f:
    L1000_up_lookup = json.load(f)

with open ("L1000_down_lookup.json", 'r') as f:
    L1000_down_lookup = json.load(f)
    
metadata = pd.read_csv("L1000_metadata.csv")

### L1000 DRUG CARD DRUG INPUT
DrOI = "tretinoin" ## this will be the user input 

DrOI_df = metadata[metadata["pert_desc"] == DrOI]
DrOI_pert_ids = list(DrOI_df["pert_id"])

DrOI_up_signatures = {k: L1000_up_lookup[k] for k in (DrOI_pert_ids)}
DrOI_up_no_perts = {k: v for d in DrOI_up_signatures.values() for k, v in d.items()}
DrOI_up_drug_sigs = list(DrOI_up_no_perts.keys())

DrOI_down_signatures = {k: L1000_down_lookup[k] for k in (DrOI_pert_ids)}
DrOI_down_no_perts = {k: v for d in DrOI_down_signatures.values() for k, v in d.items()}
DrOI_down_drug_sigs = list(DrOI_down_no_perts.keys())

DrOI_all_sigs = set(DrOI_up_drug_sigs) & set (DrOI_down_drug_sigs)

for a in DrOI_all_sigs:
    L1000_up_json_file = DrOI_up_no_perts[a]
    L1000_down_json_file = DrOI_down_no_perts[a]
    
    ### X2k CODE
    L1000_X2K_up_genes = run_X2K(L1000_up_json_file)
    L1000_X2K_up_genes = L1000_X2K_up_genes["X2K"]
    L1000_X2K_up_genes_df = pd.DataFrame(L1000_X2K_up_genes['nodes'])
    filename_up = (a + "_X2K_up_genes.csv")
    #L1000_X2K_up_genes_df.to_csv(filename_up) # THIS IS THE FILE THEY SHOULD BE ABLE TO DOWNLOAD
    print(filename_up)

        #L1000 down genes
    L1000_X2K_down_genes = run_X2K(L1000_down_json_file)
    L1000_X2K_down_genes = L1000_X2K_down_genes["X2K"]
    L1000_X2K_down_genes_df = pd.DataFrame(L1000_X2K_down_genes['nodes'])
    filename_down = (a + "_X2K_down_genes.csv")
    #L1000_X2K_down_genes_df.to_csv(filename_down) # THIS IS THE FILE THEY SHOULD BE ABLE TO DOWNLOAD

    with open(a + "_L1000_up_sig.json", "w") as f:
        json.dump(L1000_up_json_file, f) # this is the json file
        #print("ups")
    with open(a + "_L1000_down_sig.json", "w") as f:
        json.dump(L1000_down_json_file, f) # this is the json file
        #print("downs")
    
    
    