## Read and Save Query Status in CSV for all Workflows

<br>

**Read all the JSON files for all the workflows and print out the messages and query status to a CSV file**

<br>

In [1]:
# import all the modules. NB: submit_run_ars_modules contains all the modules to submit job to ARAX

import json
import requests
from gamma_viewer import GammaViewer
from IPython.display import display
#from submit_run_ars_modules import submit_to_ars, submit_to_devars, printjson, retrieve_devars_results
import glob 
import os
from collections import defaultdict
import pandas as pd
from time import sleep
from os import path

In [2]:
def submit_to_ars(m,ars_url='https://ars.ci.transltr.io/ars/api',arax_url='https://arax.ncats.io'):
    submit_url=f'{ars_url}/submit'
    response = requests.post(submit_url,json=m)
    try:
        message_id = response.json()['pk']
    except:
        print('fail')
        message_id = None
    print(f'{arax_url}/?source=ARS&id={message_id}')
    return message_id

##https://ars.ci.transltr.io/ars/api

def retrieve_ars_results(mid,ars_url='https://ars.ci.transltr.io/ars/api'):
    pk = 'https://arax.ncats.io/?source=ARS&id=' + mid
    message_url = f'{ars_url}/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    print( j['status'] )
    results = {}
    dictionary = {}
    for child in j['children']:
        print(child['status'])
        if child['status']  == 'Done':
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                nresults=0
                child['status'] = 'ARS Error'
        elif child['status'] == 'Error':
            nresults=0
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                #print(e)
                child['status'] = 'ARS Error'
        else:
            nresults = 0
            
        dictionary['pk_id'] =  pk  
            
        if ((child['status'] == 'Done') & (nresults == 0)):
            dictionary[child['actor']['agent']] = 'No Results'
            #test =  [child['actor']['agent'], 'No Results']
        elif ((child['status'] == 'ARS Error') & (nresults == 0)):
            dictionary[child['actor']['agent']] = 'ARS Error'
        elif ((child['status'] == 'Error') & (nresults == 0)):
            dictionary[child['actor']['agent']] = 'Error'
            #test =  [child['actor']['agent'], 'ARS Error']
        elif ((child['status'] == 'Done') & (nresults != 0)):
            #test =  [child['actor']['agent'], 'Results']
            dictionary[child['actor']['agent']] = 'Results'
        
        
        print(child['actor']['agent'], child['status'], nresults)
        #test =  [child['actor']['agent'], child['status'], nresults]
        #test2.append(test)
    return dictionary


#def submit_to_devars(m):
#    return submit_to_ars(m,ars_url='https://ars-dev.transltr.io/ars/api',arax_url='https://arax.ncats.io')

#def retrieve_devars_results(m):
#     return retrieve_ars_results(m,ars_url='https://ars-dev.transltr.io/ars/api')

def printjson(j):
    print(json.dumps(j,indent=4))
    
def make_hyperlink(value):
    return '=HYPERLINK("%s", "%s")' % (value.format(value), value)

<br>

**The below code reads each JSON files from the Workflows A through D (subdirectories). The queries are submitted to ARAX and output is saved in a dictionary, where the key is the file name of the JSON to denote which query is being run and the values assigned to the key is the query id**

<br>


In [3]:
PATH = r'/Users/priyash/Documents/GitHub/minihackathons/2021-12_demo'
EXT = "*.json"
dict_workflows = {}
for root, dirs, files in os.walk(PATH): # step 1: accessing file
    #print(root)
    for name in files:
        
        if name.endswith((".json")):
            file_read = path.join(root, name)
            dir_name = (os.path.splitext(os.path.basename(root))[0])
            print(file_read)
            
            filename = (os.path.splitext(os.path.basename(file_read))[0])
            print(filename)
            with open(file_read,'r') as inf:
                query = json.load(inf)
                
                kcresult = submit_to_ars(query)
                
                result_status = retrieve_ars_results(kcresult)
                
        
                dict_workflows[filename] = kcresult
                            
                sleep(600)

/Users/priyash/Documents/GitHub/minihackathons/2021-12_demo/workflowA/EGFR_advanced.json
EGFR_advanced
https://arax.ncats.io/?source=ARS&id=e8b47dcd-9638-49b0-bd0c-16a3173206c4
Running
Done
kp-icees Done 0
Running
kp-textmining Running 0
Done
ara-improving ARS Error 0
Error
ara-aragorn Error 0
Done
kp-chp Done 0
Running
ara-bte Running 0
Done
ara-unsecret Done 0
Done
kp-genetics Done 0
Done
ara-explanatory Done 0
Done
kp-molecular Done 0
Error
ara-arax Error 0
Running
kp-cam Running 0
Running
kp-openpredict Running 0
Error
kp-cohd Error 0
/Users/priyash/Documents/GitHub/minihackathons/2021-12_demo/workflowA/A.8_EGFR_simple.json
A.8_EGFR_simple
https://arax.ncats.io/?source=ARS&id=7eeb9b62-27e1-4555-aa99-ebe954e36464
Running
Running
kp-cam Running 0
Done
ara-improving ARS Error 0
Error
kp-cohd Error 0
Running
ara-arax Running 0
Running
ara-bte Running 0
Running
kp-openpredict Running 0
Done
kp-genetics Done 0
Running
ara-unsecret Running 0
Running
kp-chp Running 0
Done
kp-molecular Done

Running
Running
kp-openpredict Running 0
Running
ara-improving Running 0
Running
ara-unsecret Running 0
Running
ara-bte Running 0
Running
ara-aragorn Running 0
Error
kp-molecular Error 0
Running
kp-cam Running 0
Done
kp-icees ARS Error 0
Error
kp-cohd Error 0
Done
ara-explanatory Done 0
Running
ara-arax Running 0
Running
kp-textmining Running 0
Done
kp-chp Done 0
Error
kp-genetics Error 0
/Users/priyash/Documents/GitHub/minihackathons/2021-12_demo/workflowD/D.5_tryptophan-kynurenine.json
D.5_tryptophan-kynurenine
https://arax.ncats.io/?source=ARS&id=a80a01eb-6794-4fe2-9e97-83789f67a9cc
Running
Error
kp-cohd Error 0
Running
ara-bte Running 0
Done
kp-openpredict Done 0
Running
ara-unsecret Running 0
Done
kp-chp Done 0
Error
kp-genetics Error 0
Running
ara-arax Running 0
Running
kp-cam Running 0
Running
kp-textmining Running 0
Done
ara-explanatory Done 0
Running
ara-improving Running 0
Error
kp-molecular Error 0
Running
ara-aragorn Running 0
Done
kp-icees ARS Error 0
/Users/priyash/Docume

<br>

### Codes below are for recording messages and generating outout as csv

<br>

In [4]:
workflow_result_messages = {}
for keys, val in dict_workflows.items():
    print(keys, val)
    
    result_status = retrieve_ars_results(val)
    
    workflow_result_messages[keys] = result_status

EGFR_advanced e8b47dcd-9638-49b0-bd0c-16a3173206c4
Done
Done
kp-icees Done 0
Done
kp-textmining Done 0
Done
ara-improving ARS Error 0
Error
ara-aragorn Error 0
Done
kp-chp Done 0
Done
ara-bte Done 0
Done
ara-unsecret Done 0
Done
kp-genetics Done 0
Done
ara-explanatory Done 0
Done
kp-molecular Done 0
Error
ara-arax Error 0
Done
kp-cam Done 0
Done
kp-openpredict Done 0
Error
kp-cohd Error 0
A.8_EGFR_simple 7eeb9b62-27e1-4555-aa99-ebe954e36464
Done
Done
kp-cam Done 0
Done
ara-improving ARS Error 0
Error
kp-cohd Error 0
Done
ara-arax Done 453
Done
ara-bte Done 0
Done
kp-openpredict Done 0
Done
kp-genetics Done 0
Done
ara-unsecret Done 63
Done
kp-chp Done 0
Done
kp-molecular Done 0
Done
ara-explanatory Done 0
Done
kp-textmining Done 0
Done
ara-aragorn Done 1560
Done
kp-icees Done 0
A.1_RHOBTB2 0541cb28-1f5b-490b-858c-6412bf81ba0a
Done
Done
ara-unsecret Done 0
Done
kp-cam Done 0
Done
ara-bte Done 8
Done
ara-aragorn Done 4
Done
kp-textmining Done 0
Done
kp-icees Done 0
Error
kp-cohd Error 0
D

In [5]:
## Convert mesages to a dataframe
col = []
final_dict = defaultdict(list)
for k in sorted(workflow_result_messages):
    print(k)
    col.append(k)
    
    for key, value in workflow_result_messages[k].items():
        if key.startswith('kp-'):
            key_mod = key.replace('kp-','')
        else:
            key_mod = key
        
        final_dict[key_mod].append(value)

    final_dict = dict(final_dict)
    
df = pd.DataFrame(final_dict).T
df.rename(columns=dict(zip(df.columns, col)), inplace=True)

A.1_RHOBTB2
A.8_EGFR_simple
B.1a_DILI-three-hop-from-disease-or-phenotypic-feature_trapi
B.1b_DILI_three-hop-from-disease-or-phenotypic-feature_trapi
B.1c_DILI_three-hop-from-disease-or-phenotypic-feature_trapi
B.1d_DILI_three-hop-from-disease-or-phenotypic-feature_trapi
B.1e_DILI_three-hop-from-disease-or-phenotypic-feature_trapi
B.3_DILI-one-hop-from-genes
B.4_one-hop-gene-biological-process-or-activity_trapi
C.1_ChemSubstances_related_to_Disease
C.2_ChemSubstances_related_to_GeneSet
D.1_parkinsons-crohns
D.2_ssri-heart-disease
D.3_ssri-heart-disease-one-hop
D.4_tryptophan-kynurenine-with-curie
D.5_tryptophan-kynurenine
D.6_metformin-ferritin
EGFR_advanced


In [6]:
df

Unnamed: 0,A.1_RHOBTB2,A.8_EGFR_simple,B.1a_DILI-three-hop-from-disease-or-phenotypic-feature_trapi,B.1b_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.1c_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.1d_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.1e_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.3_DILI-one-hop-from-genes,B.4_one-hop-gene-biological-process-or-activity_trapi,C.1_ChemSubstances_related_to_Disease,C.2_ChemSubstances_related_to_GeneSet,D.1_parkinsons-crohns,D.2_ssri-heart-disease,D.3_ssri-heart-disease-one-hop,D.4_tryptophan-kynurenine-with-curie,D.5_tryptophan-kynurenine,D.6_metformin-ferritin,EGFR_advanced
pk_id,https://arax.ncats.io/?source=ARS&id=0541cb28-...,https://arax.ncats.io/?source=ARS&id=7eeb9b62-...,https://arax.ncats.io/?source=ARS&id=525f89e6-...,https://arax.ncats.io/?source=ARS&id=ec7ac77e-...,https://arax.ncats.io/?source=ARS&id=364898c1-...,https://arax.ncats.io/?source=ARS&id=66a36d77-...,https://arax.ncats.io/?source=ARS&id=5f031a83-...,https://arax.ncats.io/?source=ARS&id=4439117e-...,https://arax.ncats.io/?source=ARS&id=69074144-...,https://arax.ncats.io/?source=ARS&id=5996881e-...,https://arax.ncats.io/?source=ARS&id=9f3752ec-...,https://arax.ncats.io/?source=ARS&id=67be883f-...,https://arax.ncats.io/?source=ARS&id=ea0f4934-...,https://arax.ncats.io/?source=ARS&id=835fc0f9-...,https://arax.ncats.io/?source=ARS&id=2d20e44e-...,https://arax.ncats.io/?source=ARS&id=a80a01eb-...,https://arax.ncats.io/?source=ARS&id=b8985516-...,https://arax.ncats.io/?source=ARS&id=e8b47dcd-...
ara-unsecret,No Results,Results,No Results,No Results,No Results,Error,No Results,Results,No Results,Error,Error,Error,Error,Results,No Results,Results,Results,No Results
cam,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Results,Results,No Results,Error,Error,Error,Error,Error,Error,No Results,No Results
ara-bte,Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Error,Error,Error,Error,Error,Error,Error,Error,No Results
ara-aragorn,Results,Results,No Results,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error,No Results,Error,Error,Error
textmining,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Error,Error,No Results,Error,Error,Error,Error,Error,No Results
icees,No Results,No Results,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,No Results,No Results,No Results,ARS Error,ARS Error,ARS Error,No Results,ARS Error,ARS Error,ARS Error,No Results
cohd,Error,Error,Error,Error,Error,Error,Error,ARS Error,Error,Results,Error,Error,Error,Results,Error,Error,Error,Error
ara-explanatory,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results
ara-improving,Results,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,Results,Error,Results,Results,Results,Results,Results,Results,ARS Error


In [7]:
df.loc['pk_id'] = df.loc['pk_id'].apply(lambda x: make_hyperlink(x))

In [8]:
df.rename({'pk_id': 'pk'}, inplace=True)

In [9]:
df

Unnamed: 0,A.1_RHOBTB2,A.8_EGFR_simple,B.1a_DILI-three-hop-from-disease-or-phenotypic-feature_trapi,B.1b_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.1c_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.1d_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.1e_DILI_three-hop-from-disease-or-phenotypic-feature_trapi,B.3_DILI-one-hop-from-genes,B.4_one-hop-gene-biological-process-or-activity_trapi,C.1_ChemSubstances_related_to_Disease,C.2_ChemSubstances_related_to_GeneSet,D.1_parkinsons-crohns,D.2_ssri-heart-disease,D.3_ssri-heart-disease-one-hop,D.4_tryptophan-kynurenine-with-curie,D.5_tryptophan-kynurenine,D.6_metformin-ferritin,EGFR_advanced
pk,"=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&...","=HYPERLINK(""https://arax.ncats.io/?source=ARS&..."
ara-unsecret,No Results,Results,No Results,No Results,No Results,Error,No Results,Results,No Results,Error,Error,Error,Error,Results,No Results,Results,Results,No Results
cam,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Results,Results,No Results,Error,Error,Error,Error,Error,Error,No Results,No Results
ara-bte,Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Error,Error,Error,Error,Error,Error,Error,Error,No Results
ara-aragorn,Results,Results,No Results,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error,No Results,Error,Error,Error
textmining,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Error,Error,No Results,Error,Error,Error,Error,Error,No Results
icees,No Results,No Results,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,No Results,No Results,No Results,ARS Error,ARS Error,ARS Error,No Results,ARS Error,ARS Error,ARS Error,No Results
cohd,Error,Error,Error,Error,Error,Error,Error,ARS Error,Error,Results,Error,Error,Error,Results,Error,Error,Error,Error
ara-explanatory,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results,No Results
ara-improving,Results,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,ARS Error,Results,Error,Results,Results,Results,Results,Results,Results,ARS Error


In [10]:
df.to_excel('ara_worklow_progress_tracker_Aug9.xlsx')

In [None]:
### Save as excel sheet
#df.to_excel('ara_worklow_progress_tracker_Aug5.xlsx', sheet_name = 'Progress_Tracker')

In [11]:
### save all the ids for ara submission
with open('ara_worklow_IDs_Aug9.txt', 'w') as f:
    print(dict_workflows, file=f)

In [12]:
import gspread
from df2gspread import df2gspread as d2g
from oauth2client.service_account import ServiceAccountCredentials

In [13]:
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(
    'araxworkflowprogresstesting-2632632db8be.json', scope)
gc = gspread.authorize(credentials)

In [14]:
spreadsheet_key = '1O1cMmYGxoIqP6xbzj6FG5owiKQVg57wx2O_XIA_hN_A'
wks_name = 'Workflow Progress Tracker August 9'
d2g.upload(df, spreadsheet_key, wks_name, credentials=credentials, row_names=True)

<Worksheet 'Workflow Progress Tracker August 9' id:1101826795>

In [None]:
dict_workflows