In [1]:
## Full backend analysis for the CREEDS and L1000 dataset 
import os
import pandas as pd
from pandas.compat import StringIO
import numpy as np
from numpy import loadtxt
import sys
import json
from pprint import pprint
import objectpath
import csv
import re
import matplotlib.pyplot as plt
import json, requests
from pprint import pprint
import itertools
import scipy
from scipy.spatial import distance
from sklearn.metrics.pairwise import pairwise_distances
from clustergrammer_widget import *
def get_geneset(df, indexer):
    df_ = df.loc[indexer, :]
    return list(df_[df_ == 1].index)

In [2]:
## load in the pre-formed datasets from the L1000_Analysis and CREEDS_Analysis files.
## THIS TAKES A WHILE TO LOAD, SO ONLY LOAD THIS ONCE AND EARLY

# L1000 up and down gene loads for drug signatures
L1000_up_genes = pd.read_csv("L1000_up_genes.csv")
L1000_down_genes = pd.read_csv("L1000_down_genes.csv")

# CREEDS up and down genes for disease signatures
with open("disease_signatures-v1.0.json") as f:
    CREEDS_data = json.load(f)

# generate the up and down gene signatures
CREEDS_up_genes = {
    row['do_id']: row['up_genes']
    for row in CREEDS_data
}
CREEDS_down_genes = {
    row['do_id']: row['down_genes']
    for row in CREEDS_data
}

# load in the EMR Data (filtered > 200 in R code [Drug_diagnosis_test_code.R])
EMR_data = pd.read_csv("EMR_greater_200.csv")
## subset EMR data by the DOI and/or DrOI
EMR_data_df = pd.DataFrame(EMR_data)
#EMR_data
EMR_data_df.drop(EMR_data_df.columns[[0]], axis = 1, inplace = True) # remove the unecessary columns
#EMR_data_df

# implement the search from ICD9-do_id from the manual conversion
icd9_to_doid = pd.read_csv("ICD9_CREEDS_conversion.csv")
icd9_to_doid = pd.DataFrame(icd9_to_doid) # convert it to a data fram to drop unecessary rows
#icd9_to_doid # sanity check
icd9_to_doid_final = icd9_to_doid.drop(icd9_to_doid.columns[[0, 6, 7, 8, 9, 10, 11, 12, 13, 14]], axis = 1)
#icd9_to_doid_final # sanity check

## PRELOADED DATA
## L1000 conversion and analysis DRUG
L1000_down_extract = pd.DataFrame(L1000_down_genes)
L1000_up_extract = pd.DataFrame(L1000_up_genes)

## L1000 ANALYSIS -- FEEDS BACK INTO THE API to get additional signatures
metadata = pd.read_csv("L1000_metadata.csv")
#metadata ## same as LINC1000h5.row_metadata_df

EMR_Drug_Names = EMR_data_df['Drug_Name'] # this will be the selection for the dropdown menu
## get the subselection of drug names

unique_pert_ids = metadata['pert_id'].unique()
unique_drug_names = metadata['pert_desc'].unique()

possible_drug_inputs = set(unique_drug_names) & set(EMR_Drug_Names) # 28 possible drug inputs


In [12]:
possible_drug_inputs

{'BISACODYL',
 'CARVEDILOL',
 'DEXAMETHASONE',
 'DIAZEPAM',
 'DIGOXIN',
 'Dexamethasone',
 'ENALAPRIL MALEATE',
 'EZETIMIBE',
 'FLUCONAZOLE',
 'GLIPIZIDE',
 'GUANFACINE',
 'Glycopyrrolate',
 'LOVASTATIN',
 'METOLAZONE',
 'METOPROLOL TARTRATE',
 'MINOXIDIL',
 'Milrinone',
 'NADOLOL',
 'OMEPRAZOLE',
 'PREDNISOLONE',
 'PREDNISONE',
 'Propofol',
 'QUETIAPINE',
 'RANITIDINE',
 'SIMVASTATIN',
 'TACROLIMUS',
 'VALSARTAN',
 'WARFARIN'}

In [28]:
### try the input
DrOI = "dexamethosone" # drug of interest. MAKE SURE TO SELECT THIS FROM "possible_drug_inputs"
####

DrOI_up_extract = L1000_up_extract[L1000_up_extract['Unnamed: 0'].apply(lambda s: bool(re.compile(DrOI, re.IGNORECASE).search(s)))]
DrOI_up_final = DrOI_up_extract.loc[:, (DrOI_up_extract != 0).any(axis=0)] # remove any genes without any expression in any of the results

DrOI_down_extract = L1000_down_extract[L1000_down_extract['Unnamed: 0'].apply(lambda s: bool(re.compile(DrOI, re.IGNORECASE).search(s)))]
DrOI_down_final = DrOI_down_extract.loc[:, (DrOI_down_extract != 0).any(axis=0)] # remove any genes without any expression in any of the results


L1000FWD_URL = 'http://amp.pharm.mssm.edu/L1000FWD/'

query_string = DrOI
response = requests.get(L1000FWD_URL + 'synonyms/' + query_string)
if response.status_code == 200:
	pprint(response.json())
	#json.dump(response.json(), open('api1_result.json', 'wb'), indent=4)
L1000_significant_query = response.json()
L1000_significant_query_df = pd.DataFrame(L1000_significant_query)
L1000_significant_pert_ids = L1000_significant_query_df["pert_id"]
L1000_summary_table = metadata[metadata["pert_id"].isin(L1000_significant_pert_ids)] # display this table!
L1000_drug_signatures = L1000_summary_table.rid


if len(L1000_significant_pert_ids) > 0:
    
    test=[]
    for q in L1000_significant_pert_ids:
        meta_doi = metadata[metadata["pert_id"].apply(lambda s: bool(re.compile(str(q), re.IGNORECASE).search(str(s))))]
        #print(q)
        meta_doi_ids = meta_doi.rid
        query = list(meta_doi_ids)
       # print(query)
        test.append(query)
    #test
    test1 = [x for x in test if x]
   #test1
    L1000_drug_signatures = list(itertools.chain.from_iterable(test1))
    
  
    if len(L1000_drug_signatures) > 0:
    ## L1000 iterations API   
        L1000FWD_URL = 'http://amp.pharm.mssm.edu/L1000FWD/'
        json_name_store = []
        drug_sig_store = []
        for x in L1000_drug_signatures:
            sig_id = x
            response = requests.get(L1000FWD_URL + 'sig/' + str(sig_id))
            if response.status_code == 200:
                #pprint(response.json())
                json.dump(response.json(), open(sig_id + '_api2_result.json', 'w'), indent=4)
                json_name = (sig_id + '_api2_result.json')
                print(json_name)
                json_name_store.append(json_name)
                
                L1000_query = response.json() 
                L1000_query_up_genes = L1000_query["up_genes"]
                L1000_query_down_genes = L1000_query["down_genes"]


        L1000_up_df = pd.DataFrame(L1000_query_up_genes)
        L1000_down_df = pd.DataFrame(L1000_query_down_genes)
        L1000_down_df = L1000_down_df.T
        L1000_up_df = L1000_up_df.T
        #L1000_up_df

        q1 = pd.DataFrame(L1000_query_down_genes, 
                     columns= ["L1000 Down Genes"])[0:10]
        q = pd.DataFrame(L1000_query_up_genes, 
                     columns= ["L1000 Up Genes"])[0:10]
        L1000_up_down10 = pd.concat([q1, q], axis = 1, ignore_index= False)
        L1000_up_down10
        print("SIGNIFICANT L1000 SIGNATURES")
    else:
        print ("NO Significant signatures found in metadata")

    
else:
    print("NO SIGNIFICANT L1000 SIGNATURES FOR" + DrOI)

[]


KeyError: 'pert_id'

In [29]:
L1000_significant_pert_ids

0    BRD-A87606379
1    BRD-A97454584
Name: pert_id, dtype: object

In [116]:
for x in L1000_drug_signatures:
    sig_id = x
    response = requests.get(L1000FWD_URL + 'sig/' + str(sig_id))
    if response.status_code == 200:
        #pprint(response.json())
        #json.dump(response.json(), open(sig_id + '_api2_result.json', 'w'), indent=4)
        #print(response)
        json_name = (sig_id + '_api2_result.json')
       # print(json_name)
        json_name_store.append(json_name)
#json_name_store

In [None]:
## this can be preloaded
CREEDS_Disease_total = {
    row['geo_id']: row["disease_name"]
    for row in CREEDS_data
}
CREEDS_Disease_total

In [195]:
### CREEDS DRUG CARD 



#for a in loop_iteration:
CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
CREEEDS_Drug_response = requests.get(CREEDS_URL + 'search', params={'q':DrOI})
if CREEEDS_Drug_response.status_code == 200:
    #pprint(CREEEDS_Drug_response.json())
    #json.dump(CREEEDS_Drug_response.json(), open(DrOI + '_api1_result.json', 'w'), indent=4)
    CREEDS_drug_output_df = pd.DataFrame(CREEEDS_Drug_response.json())
    CREEDS_drug_output_ids = list(CREEDS_drug_output_df["id"])
    
    for a in CREEDS_drug_output_ids:
        CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
        CREEDS_drug_sigs_response = requests.get(CREEDS_URL + 'api', params={'id':'drug:DM609'})
        if CREEDS_drug_sigs_response.status_code == 200:
            CREEDS_drug_sigs_response_json = CREEDS_drug_sigs_response.json()
            
            ## up genes
            CREEDS_drug_sigs_up_genes = CREEDS_drug_sigs_response_json['up_genes']
            CREEDS_drug_sigs_up_genes_df = pd.DataFrame(CREEDS_drug_sigs_up_genes) # this is the up genes dataframe
            filename1 = (a + "_CREEDS_drug_sig_up_genes.csv")
            CREEDS_drug_sigs_up_genes_df.to_csv(filename1) # this saves the df as a csv
            
            ## down genes
            CREEDS_drug_sigs_down_genes = CREEDS_drug_sigs_response_json['down_genes']
            CREEDS_drug_sigs_down_genes_df = pd.DataFrame(CREEDS_drug_sigs_down_genes)# this is the down genes dataframe
            filename2 = (a + "_CREEDS_drug_sig_down_genes.csv")
            CREEDS_drug_sigs_down_genes_df.to_csv(filename2)
            CREEDS_drug_sigs_down_genes = CREEDS_drug_sigs_response_json['down_genes'] # this saves the df as a csv
            
            ## json propagation
            #pprint(response.json())
            json.dump(response.json(), open(a + '_CREEDS_Drug_sig.json', 'w'), indent=4) # if the user wants the entire json, they can download this
            

In [189]:
list(CREEDS_drug_output_df["id"])

['drug:DM609',
 'drug:DM610',
 'drug:DM611',
 'drug:DM2490',
 'drug:DM2491',
 'drug:DM2492',
 'drug:DM2493',
 'drug:DM2494',
 'drug:DM2495',
 'drug:DM2496']

In [188]:

CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
response = requests.get(CREEDS_URL + 'api', params={'id':'drug:DM609'})
if response.status_code == 200:
	pprint(response.json())
	json.dump(response.json(), open('api3_result.json', 'wb'), indent=4)

{'cas_num': '20830-75-5',
 'ctrl_ids': ['GSM1450564',
              'GSM1450658',
              'GSM1451013',
              'GSM1451038',
              'GSM1451583',
              'GSM1451642',
              'GSM1451670',
              'GSM1451804',
              'GSM1451978',
              'GSM1452012',
              'GSM1452133',
              'GSM1452214'],
 'dose': '11 mg/kg',
 'down_genes': [['Brp44l', -0.23358292877674103],
                ['Adora3', -0.12489742785692215],
                ['Ywhah', -0.1182096004486084],
                ['Slc25a4', -0.09553932398557663],
                ['Acta1', -0.092685766518116],
                ['Lrp16', -0.09190116822719574],
                ['Wsb2', -0.08691895753145218],
                ['Immt', -0.08546142280101776],
                ['Tnni3', -0.0850004330277443],
                ['LOC690109', -0.08233951777219772],
                ['Tubb2c', -0.08166361600160599],
                ['Rps6', -0.077518530189991],
                ['Cycs', -0.

TypeError: a bytes-like object is required, not 'str'

In [194]:
CREEDS_drug_query = response.json()
pd.DataFrame(CREEDS_drug_query["up_genes"])

Unnamed: 0,0,1
0,Fgf2,0.354092
1,Smad5,0.291227
2,Ibsp,0.232229
3,Penk1,0.189057
4,Sert1,0.168047
5,Fth1,0.128081
6,Pnpla2_predicted,0.114262
7,Hmga2,0.110594
8,Fos,0.103193
9,Ccng1,0.092278


In [21]:
CREEDS_drug_ids_list

['dz:68', 'dz:1012', 'dz:477', 'dz:230', 'dz:185', 'dz:372']

In [27]:
### CREEDS DISEASE CARD (DRUG INPUT)

# RETURNS THE do_id, geo_id, and disease name in a dictionary
CREEDS_GSE = {
    row['id']: [row['geo_id'], row["disease_name"]]
    for row in CREEDS_data
}

## filter by DrOI need icd9 codes for proper conversion and query through CREEDS
droi_search =EMR_data_df[EMR_data_df['Drug_Name'].apply(lambda s: bool(re.compile(DrOI, re.IGNORECASE).search(s)))]
droi_search_top5 = droi_search[0:5]
EMR_top_disease_from_drug = droi_search_top5["ICD9"]
#top_disease_from_drug = EMR_top_disease_from_drug[0:5]

## build a datatable of all the ICD-9 CM diagnosis codes families (i.e no decimal points)
EMR_top_disease_from_drug_df = pd.DataFrame(EMR_top_disease_from_drug, columns=['ICD9'])
EMR_top_disease_from_drug_df['ICD9_wildcard'] = EMR_top_disease_from_drug_df['ICD9'].apply(lambda code: code.split('.')[0])
#EMR_top_disease_from_drug_df.head()
icd9_to_doid_final['ICD9_wildcard'] = icd9_to_doid_final['ICD9'].apply(lambda code: str(code).split('.')[0])
#icd9_to_doid_final.head()
df_joined = pd.merge(
    left=EMR_top_disease_from_drug_df, left_on='ICD9_wildcard',
    right=icd9_to_doid_final, right_on='ICD9_wildcard',
    how='inner',
    suffixes=(
        '_left',
        '_right',
    )
)

CREEDS_drug_ids = pd.DataFrame(set(df_joined.CREEDS_drug_id))
CREEDS_drug_ids_list = list(set(df_joined.CREEDS_drug_id))
#CREEDS_GSE.keys()
#CREEDS_drug_ids_list
CREEDS_Drug_Final = dict((k, CREEDS_GSE[k]) for k in CREEDS_drug_ids_list)
CREEDS_drug_final_df = pd.DataFrame(CREEDS_Drug_Final).T
CREEDS_drug_final_df.columns = ["GSE_ID", "DISEASE"]
#CREEDS_drug_final_df # DISPLAY THIS DATAFRAME



### CREEDS DISEASE CARD FROM DRUG INPUT API
CREEDS_drug_final_diseases = CREEDS_drug_final_df.DISEASE
CREEDS_drug_final_GSE_ID = CREEDS_drug_final_df.GSE_ID
## CREEDS DISEASE CARD FROM DISEASE QUERY 

loop_iteration = np.arange(0, len(CREEDS_drug_final_diseases))
loop_iteration = list(loop_iteration)

CREEDS_total_api_df = []

for a in loop_iteration:
    CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
    CREEEDS_Disease_response = requests.get(CREEDS_URL + 'api', params={'id':CREEDS_drug_ids_list[a]})
    if CREEEDS_Disease_response.status_code == 200:
        #pprint(CREEEDS_Disease_response.json())
        #json.dump(CREEEDS_Drug_response.json(), open(CREEDS_drug_final_GSE_ID[a] + '_api1_result.json', 'w'), indent=4)
        CREEEDS_Disease_response_json = CREEEDS_Disease_response.json()
        
        ## up genes
        CREEDS_disease_sigs_up_genes = CREEEDS_Disease_response_json['up_genes']
        CREEDS_disease_sigs_up_genes_df = pd.DataFrame(CREEDS_disease_sigs_up_genes) # this is the up genes dataframe
        filename1 = (str(CREEDS_drug_ids_list[a]) + "_CREEDS_disease_sig_up_genes.csv")
        CREEDS_disease_sigs_up_genes_df.to_csv(filename1) # this saves the df as a csv
        
        
        ## down genes
        CREEDS_disease_sigs_down_genes = CREEEDS_Disease_response_json['down_genes']
        CREEDS_disease_sigs_down_genes_df = pd.DataFrame(CREEDS_disease_sigs_down_genes) # this is the up genes dataframe
        filename2 = (str(CREEDS_drug_ids_list[a]) + "_CREEDS_disease_sig_down_genes.csv")
        CREEDS_disease_sigs_down_genes_df.to_csv(filename2) # this saves the df as a csv
        
        # entire json
        json.dump(response.json(), open(a + '_CREEDS_Disease_sig.json', 'w'), indent=4) # if the user wants the entire json, they can download this
            
        
        #CREEEDS_Drug_response_df = pd.DataFrame(CREEEDS_Drug_response_json)
        #CREEEDS_Drug_response_df # This will be the dataframe to return
        #CREEDS_total_api_df.append(CREEEDS_Drug_response_df)
#CREEDS_total_api_df = pd.concat(CREEDS_total_api_df, axis =1)
#CREEDS_total_api_df.T ## display this datatable



ConnectionError: HTTPConnectionPool(host='amp.pharm.mssm.edu', port=80): Max retries exceeded with url: /CREEDS/api?id=dz%3A68 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fc7e94cc3c8>: Failed to establish a new connection: [Errno 60] Operation timed out'))

In [24]:
CREEEDS_Drug_response_json

{'do_id': 'DOID:5082',
 'cell_type': 'Hepatic Tissue',
 'pert_ids': ['GSM32443', 'GSM32444', 'GSM32445'],
 'umls_cui': 'C0023890',
 'geo_id': 'GSE1843',
 'platform': 'GPL341',
 'version': '1.0',
 'ctrl_ids': ['GSM32440', 'GSM32441', 'GSM32442'],
 'down_genes': [['Dnase1l3', -0.11954674869775772],
  ['Lrg1', -0.10601773858070374],
  ['Il1a', -0.09904612600803375],
  ['Mt2A', -0.09888061881065369],
  ['Btg2', -0.09641542285680771],
  ['Egr1', -0.09293881803750992],
  ['Stom', -0.08701872080564499],
  ['Cyp4b1', -0.08677317947149277],
  ['Pde2a', -0.08359409123659134],
  ['Ctsl', -0.07969368249177933],
  ['Txnip', -0.07940316200256348],
  ['Bmp2', -0.07727105170488358],
  ['Hspb1', -0.0771307498216629],
  ['Eng', -0.07660254836082458],
  ['Gstp1', -0.07371693849563599],
  ['Kdr', -0.07116224616765976],
  ['Lyve1', -0.07061560451984406],
  ['Gadd45g', -0.06928515434265137],
  ['Fcgr2b', -0.06840667128562927],
  ['Adipor1', -0.06663866341114044],
  ['Junb', -0.06634946912527084],
  ['Ppap2b

In [161]:
list(CREEEDS_Drug_response_json)

[{'do_id': 'DOID:6000',
  'cell_type': 'Myocardial tissue',
  'pert_ids': ['GSM40957', 'GSM40958'],
  'umls_cui': 'C0018802',
  'geo_id': 'GSE2236',
  'platform': 'GPL1261',
  'version': '1.0',
  'ctrl_ids': ['GSM40959', 'GSM40960'],
  'disease_name': 'congestive heart failure',
  'organism': 'mouse',
  'id': 'dz:258',
  'curator': 'Joel.Dudley'},
 {'do_id': 'DOID:6000',
  'cell_type': 'Myocardial tissue',
  'pert_ids': ['GSM35542', 'GSM35543', 'GSM35561'],
  'umls_cui': 'C0018801',
  'geo_id': 'GSE1988',
  'platform': 'GPL339',
  'version': '1.0',
  'ctrl_ids': ['GSM35562', 'GSM35563', 'GSM35564', 'GSM35566'],
  'disease_name': 'congestive heart failure',
  'organism': 'mouse',
  'id': 'dz:273',
  'curator': 'Joel.Dudley'}]

In [145]:
loop_iteration = np.arange(0, len(CREEDS_drug_final_diseases))
list(loop_iteration)

[0, 1, 2]

In [128]:
icd9_to_doid_final[0:5]

Unnamed: 0,CREEDS_drug_id,Disease,DOID,Concept_ID,ICD9,ICD9_wildcard
0,dz:578,arthritis,DOID:848,C0003864,714.0,714
1,dz:713,asthma,DOID:2841,C0004096,493.0,493
2,dz:116,oligodendroglioma,DOID:3181,C0028945,192.9,192
3,dz:98,appendicitis,DOID:8337,C0003615,540.0,540
4,dz:39,breast cancer,DOID:1612,C0006142,174.0,174


In [129]:
EMR_top_disease_from_drug_df[0:5]

Unnamed: 0,ICD9,ICD9_wildcard
369,428.0,428
799,427.31,427
1181,428.0,428
1709,427.31,427
1886,V14,V14


In [117]:
### CREEDS DISEASE CARD (DRUG INPUT)

# RETURNS THE do_id, geo_id, and disease name in a dictionary
CREEDS_GSE = {
    row['id']: [row['geo_id'], row["disease_name"]]
    for row in CREEDS_data
}

## filter by DrOI need icd9 codes for proper conversion and query through CREEDS
droi_search =EMR_data_df[EMR_data_df['Drug_Name'].apply(lambda s: bool(re.compile(DrOI, re.IGNORECASE).search(s)))]
droi_search_top5 = droi_search[0:5]
EMR_top_disease_from_drug = droi_search_top5["ICD9"]
#top_disease_from_drug = EMR_top_disease_from_drug[0:5]

## build a datatable of all the ICD-9 CM diagnosis codes families (i.e no decimal points)
EMR_top_disease_from_drug_df = pd.DataFrame(EMR_top_disease_from_drug, columns=['ICD9'])
EMR_top_disease_from_drug_df['ICD9_wildcard'] = EMR_top_disease_from_drug_df['ICD9'].apply(lambda code: code.split('.')[0])
#EMR_top_disease_from_drug_df.head()
icd9_to_doid_final['ICD9_wildcard'] = icd9_to_doid_final['ICD9'].apply(lambda code: str(code).split('.')[0])
#icd9_to_doid_final.head()
df_joined = pd.merge(
    left=EMR_top_disease_from_drug_df, left_on='ICD9_wildcard',
    right=icd9_to_doid_final, right_on='ICD9_wildcard',
    how='inner',
    suffixes=(
        '_left',
        '_right',
    )
)

CREEDS_drug_ids = pd.DataFrame(set(df_joined.CREEDS_drug_id))
CREEDS_drug_ids_list = list(set(df_joined.CREEDS_drug_id))
#CREEDS_GSE.keys()
#CREEDS_drug_ids_list
CREEDS_Drug_Final = dict((k, CREEDS_GSE[k]) for k in CREEDS_drug_ids_list)
CREEDS_drug_final_df = pd.DataFrame(CREEDS_Drug_Final).T
CREEDS_drug_final_df.columns = ["GSE_ID", "DISEASE"]
CREEDS_drug_final_df # this will be the dataframe to display



Unnamed: 0,GSE_ID,DISEASE
dz:258,GSE2236,congestive heart failure
dz:273,GSE1988,congestive heart failure
dz:179,GSE3675,Decompensated cardiac failure


In [118]:
# CREEDS DISEASE CARD CONT

CREEDS_GSE_IDS = list(CREEDS_drug_final_df.GSE_ID)

## this can be preloaded
CREEDS_Disease_up_genes = {
    row['geo_id']: row["up_genes"]
    for row in CREEDS_data
}

CREEDS_Disease_down_genes = {
    row['geo_id']: row["down_genes"]
    for row in CREEDS_data
}
##


CREEDS_Disease_up_genes_subset = {  # This can be downloaded and saved for all geo_ID (it's not a json, but a dictionary)
    geo_id: geneset
    for geo_id, geneset in CREEDS_Disease_up_genes.items()
    if geo_id in CREEDS_GSE_IDS
}
#CREEDS_Disease_up_genes_subset


CREEDS_Disease_down_genes_subset = { # This can be downloaded and saved for all geo_ID  (it's not a json, but a dictionary)
    geo_id: geneset
    for geo_id, geneset in CREEDS_Disease_down_genes.items()
    if geo_id in CREEDS_GSE_IDS
}

In [119]:
CREEDS_Disease_up_genes_subset

{'GSE1988': [['Myh7', 0.31668737530708313],
  ['Ankrd1', 0.28965798020362854],
  ['Nppa', 0.2658657729625702],
  ['Nppb', 0.21812331676483154],
  ['Ctgf', 0.19904661178588867],
  ['Acta1', 0.1512189358472824],
  ['Ptgds', 0.13377198576927185],
  ['Postn', 0.12912796437740326],
  ['Neat1', 0.11931392550468445],
  ['Gpx3', 0.11522702872753143],
  ['Rpl3', 0.11367413401603699],
  ['Eef1a1', 0.09848426282405853],
  ['Mgp', 0.09793787449598312],
  ['Ehd4', 0.09600234776735306],
  ['Mybpc3', 0.08380696177482605],
  ['Ly6a', 0.08174382895231247],
  ['Hba-a2', 0.08120974898338318],
  ['Gapdh', 0.07149189710617065],
  ['Naca', 0.0679885521531105],
  ['Actn2', 0.06788095086812973],
  ['BC094435', 0.06748389452695847],
  ['Bgn', 0.06529015302658081],
  ['Bnip3', 0.059536803513765335],
  ['C3', 0.05861631780862808],
  ['Fhl1', 0.05655547231435776],
  ['Ctsl', 0.055736128240823746],
  ['Clu', 0.05277891084551811],
  ['Rpl10', 0.04907064139842987],
  ['Rsph3b', 0.04732457548379898],
  ['Samd4', 0.04

In [30]:
pd.DataFrame(CREEDS_Disease_up_genes_subset)

NameError: name 'CREEDS_Disease_up_genes_subset' is not defined

In [None]:
###### OLD CODE: DO NOT RUN

### CREEDS DISEASE CARD (DRUG INPUT)

# RETURNS THE do_id, geo_id, and disease name in a dictionary
CREEDS_GSE = {
    row['id']: [row['geo_id'], row["disease_name"]]
    for row in CREEDS_data
}

## filter by DrOI need icd9 codes for proper conversion and query through CREEDS
droi_search =EMR_data_df[EMR_data_df['Drug_Name'].apply(lambda s: bool(re.compile(DrOI, re.IGNORECASE).search(s)))]
droi_search_top5 = droi_search[0:5]
EMR_top_disease_from_drug = droi_search_top5["ICD9"]
#top_disease_from_drug = EMR_top_disease_from_drug[0:5]

## build a datatable of all the ICD-9 CM diagnosis codes families (i.e no decimal points)
EMR_top_disease_from_drug_df = pd.DataFrame(EMR_top_disease_from_drug, columns=['ICD9'])
EMR_top_disease_from_drug_df['ICD9_wildcard'] = EMR_top_disease_from_drug_df['ICD9'].apply(lambda code: code.split('.')[0])
#EMR_top_disease_from_drug_df.head()
icd9_to_doid_final['ICD9_wildcard'] = icd9_to_doid_final['ICD9'].apply(lambda code: str(code).split('.')[0])
#icd9_to_doid_final.head()
df_joined = pd.merge(
    left=EMR_top_disease_from_drug_df, left_on='ICD9_wildcard',
    right=icd9_to_doid_final, right_on='ICD9_wildcard',
    how='inner',
    suffixes=(
        '_left',
        '_right',
    )
)

CREEDS_drug_ids = pd.DataFrame(set(df_joined.CREEDS_drug_id))
CREEDS_drug_ids_list = list(set(df_joined.CREEDS_drug_id))
#CREEDS_GSE.keys()
#CREEDS_drug_ids_list
CREEDS_Drug_Final = dict((k, CREEDS_GSE[k]) for k in CREEDS_drug_ids_list)
CREEDS_drug_final_df = pd.DataFrame(CREEDS_Drug_Final).T
CREEDS_drug_final_df.columns = ["GSE_ID", "DISEASE"]
#CREEDS_drug_final_df



### CREEDS DISEASE CARD FROM DRUG INPUT API
CREEDS_drug_final_diseases = CREEDS_drug_final_df.DISEASE
CREEDS_drug_final_GSE_ID = CREEDS_drug_final_df.GSE_ID
## CREEDS DISEASE CARD FROM DISEASE QUERY 

loop_iteration = np.arange(0, len(CREEDS_drug_final_diseases))
loop_iteration = list(loop_iteration)

CREEDS_total_api_df = []

for a in loop_iteration:
    CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
    CREEEDS_Drug_response = requests.get(CREEDS_URL + 'search', params={'q':CREEDS_drug_final_diseases[a]})
    if CREEEDS_Drug_response.status_code == 200:
        #pprint(CREEEDS_Disease_response.json())
        #json.dump(CREEEDS_Drug_response.json(), open(CREEDS_drug_final_GSE_ID[a] + '_api1_result.json', 'w'), indent=4)
        CREEEDS_Drug_response_json = CREEEDS_Drug_response.json()
        CREEEDS_Drug_response_df = pd.DataFrame(CREEEDS_Drug_response_json)
        #CREEEDS_Drug_response_df # This will be the dataframe to return
        CREEDS_total_api_df.append(CREEEDS_Drug_response_df)
CREEDS_total_api_df = pd.concat(CREEDS_total_api_df, axis =1)
CREEDS_total_api_df.T ## display this datatable

