# Before generating SPOKEsigs you will need EHR data...

This code used EHR data from OMOP (tables: condition_occurrence, drug_exposure, and measurement)<br/>
Only the person_id and *concept_id columns were exported from the OMOP tables.<br/>
Lab tests were filtered to only include abnormal results.

# Import modules

In [1]:
import pandas as pd
import numpy as np
import requests
from requests.auth import HTTPBasicAuth
from retrying import retry
import warnings
warnings.filterwarnings("ignore")

base_url = 'https://spokeapi.ucsf.edu'
end_point = '/v1/spoke_sig'
usr = 'user1'
psw = 'Ou0At7jXZJmt9oGP9fbGZ2TwstNFFE2l7Tru69HxnCI'
url = base_url + end_point

@retry(stop_max_attempt_number=50, wait_fixed=10000)
def get_api_resp(clinical_variables):
    return requests.get(url, params={'src':clinical_variables}, auth = HTTPBasicAuth(usr, psw), verify=False)

def make_spoke_sigs(all_concept_df):
    patients_seen, spoke_sigs = [], []
    for patient, clinical_variables in all_concept_df[['Patient_Index', 'concept_id']].values:
        result = get_api_resp(clinical_variables)
        if result.status_code == 200:
            if len(result.json()[0]['spoke_sig']) >0:
                print("API call was successfull, spoke signature for patient %s is available in 'spoke_sig' variable" % patient)
                patients_seen.append(patient)
                spoke_sigs.append(result.json()[0]['spoke_sig'])
            else:
                print('API call could not create a spoke signature for patient %s' % patient)
        else:
            print('API call was not successfull for patient %s, returned %s HTTP status code' % (patient, str(result.status_code)))    
    return np.array(patients_seen), np.array(spoke_sigs)

def load_patient_information(diag_filename, med_filename, lab_filename):
    print('Reading diagnosis file ...')
    diag_df = pd.read_csv(diag_filename, sep='\t', header=0, index_col=False)
    print('Reading medication file ...')
    med_df = pd.read_csv(med_filename, sep='\t', header=0, index_col=False)
    print('Reading lab file ...')
    lab_df = pd.read_csv(lab_filename, sep='\t', header=0, index_col=False)
    print('Merging patient tables')
    all_concept_df = pd.concat((diag_df, med_df, lab_df), axis=0).drop_duplicates()
    all_concept_df.loc[:,'concept_id'] = ['OMOP:%s' % concept_id for concept_id in all_concept_df.concept_id.values]
    all_concept_df = all_concept_df.groupby('person_id')['concept_id'].apply(list).reset_index()
    all_concept_df.loc[:,'Patient_Index'] = np.arange(len(all_concept_df))
    return all_concept_df

# Load and filter patient data

In [2]:
# filenames from SQL queries
diag_filename = 'condition_occurrence.tsv'
med_filename = 'drug_exposure.tsv'
lab_filename = 'measurement.tsv'

all_concept_df = load_patient_information(diag_filename, med_filename, lab_filename)
all_concept_df.head()

Reading diagnosis file ...
Reading medication file ...
Reading lab file ...
Merging patient tables


Unnamed: 0,person_id,concept_id,Patient_Index
0,230130,"[OMOP:35225058, OMOP:45556996, OMOP:44824029, ...",0
1,411126,"[OMOP:44833044, OMOP:37202304, OMOP:44825717, ...",1
2,547231,"[OMOP:45600511, OMOP:35225339, OMOP:35209124, ...",2
3,742728,"[OMOP:45587523, OMOP:44826854, OMOP:44832716, ...",3
4,1291295,"[OMOP:45601745, OMOP:45551542, OMOP:44829584, ...",4


# Make SPOKEsigs

In [3]:
patients_seen, spoke_sigs = make_spoke_sigs(all_concept_df)

API call was successfull, spoke signature for patient 0 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 1 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 2 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 3 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 4 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 5 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 6 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 7 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 8 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 9 is available in 'spoke_sig' variable
API call was successfull, spoke signature for patient 10 is available 

# Save new patient information

In [4]:
# filter patients w/o SPOKEsigs
if len(all_concept_df) > len(patients_seen):
    all_concept_df = all_concept_df[all_concept_df.Patient_Index.isin(patients_seen)]
    all_concept_df.loc[:,'Patient_Index'] = np.arange(len(all_concept_df))

# save patient info and SPOKEsigs
np.save('new_patient_spokesigs.npy', spoke_sigs, allow_pickle=False)
all_concept_df.to_csv('new_patient_info.tsv', sep='\t', header=True, index=False)