In [1]:
import requests
import json
import pandas as pd
import numpy as np
from pandas import json_normalize
import math
from tqdm import tqdm

In [2]:
LOGIN_ENDPOINT = "https://webservices.sagebridge.org/v3/auth/signIn"
API_ENDPOINT = "https://webservices.sagebridge.org/v5/studies/"

LOG_VAL = {
  "appId": "mobile-toolbox",
  "email": "XXXXXX@XXX.com",
  "password": "XXXXXX"
}
studyid = 'fmqcjv'

### Login Authentication

In [3]:
def login_bridge(login_api, login_param):
    response = requests.post(login_api, json=login_param)
    return response

In [4]:
login_res = login_bridge(LOGIN_ENDPOINT, LOG_VAL)

### Get Enrollment Info

In [5]:
def get_enroll(api_endpoint, studyid, login_res):
    enroll_api = api_endpoint + studyid + '/enrollments?offsetBy=0&pageSize=100'
    
    enroll_res = requests.get(enroll_api, headers={'Bridge-Session':login_res.json()['sessionToken']})
    enroll_json = enroll_res.json()
    total_rec = enroll_json['total']
    
    df_enroll = get_enroll_offset(api_endpoint, total_rec, login_res)
    return df_enroll

def get_enroll_offset(api_endpoint, total_rec, login_res):
    pageSize = 100
    offsetBy = 0
    
    df_list = []
    tot_page = math.ceil(total_rec/100)
    
    for i in range(tot_page):
        enroll_api = api_endpoint + studyid + '/enrollments?offsetBy='+ str(offsetBy) +'&pageSize='+ str(pageSize)
        
        enroll_res = requests.get(enroll_api, headers={'Bridge-Session':login_res.json()['sessionToken']})
        enroll_json = enroll_res.json()
        df_enroll = json_normalize(enroll_json['items'])
        
        df_list.append(df_enroll)
        offsetBy += pageSize
    
    df_final = pd.concat(df_list).reset_index(drop=True)[['participant.identifier']]
    return df_final

def get_participant(api_endpoint, studyid, login_res, df_enroll):
    df_list = []
    
    for index, row in tqdm(df_enroll.iterrows()):
        userid = row['participant.identifier']
        
        participant_api = api_endpoint + studyid + '/participants/'+ userid
        participant_res = requests.get(participant_api, headers={'Bridge-Session':login_res.json()['sessionToken']})
        participant_json = participant_res.json()
        
        df_participant = json_normalize(participant_json)
        df_list.append(df_participant)
    
    df_final = pd.concat(df_list).reset_index(drop=True)
    df_final = df_final[['externalId', 'healthCode', 'id']]
    return df_final

def filter_record(df_enroll, df_participant):
    df_participant = df_participant[['externalId', 'healthCode', 'id']]
    
    df_enroll = df_enroll.rename(columns={"participant.identifier": "id"})
    df_merge = df_enroll.merge(df_participant, on='id', how='left')
    return df_merge

In [6]:
#Get Enrollment and Participant Info
df_enroll = get_enroll(API_ENDPOINT, studyid, login_res)
df_participant = get_participant(API_ENDPOINT,studyid, login_res, df_enroll)

6804it [06:09, 18.42it/s]


In [7]:
df_info = filter_record(df_enroll, df_participant)

### Get Adherence

In [8]:
def get_adherence(api_endpoint, df_info, studyid, login_res):
    df_list = []
    
    for index, row in tqdm(df_info.iterrows()):
        userid = row['id']
        
        adherence_api = api_endpoint+studyid+'/participants/'+userid+'/adherence/search?pageSize=500'
        json_header = {"adherenceRecordType": "assessment"}
        session_header = {'Bridge-Session':login_res.json()['sessionToken']}

        adh_res = requests.post(adherence_api, json=json_header, headers=session_header)
        adh_json = adh_res.json()
        df_json = json_normalize(adh_json['items'])
        
        df_json['healthcode'] = row['healthCode']
        df_json['externalId'] = row['externalId']
        
        if len(df_json)==0:#In case no adherence record available
            df_json = pd.DataFrame(columns=['instanceGuid', 'eventTimestamp', 'startedOn', 'finishedOn',
                                            'clientTimeZone', 'declined', 'assessmentGuid', 'type'])
            
            df_json['userId'] = [row['id']]
            df_json['healthcode'] = [row['healthCode']]
            df_json['externalId'] = [row['externalId']]
            
        df_list.append(df_json)
    df_final = pd.concat(df_list).reset_index(drop=True)
    return df_final


In [9]:
adh_res = get_adherence(API_ENDPOINT, df_info, studyid, login_res)

6804it [05:54, 19.18it/s]


##### Filter Adherence

In [10]:
#Replace it with specific project score
df_score = pd.read_csv('data/MTB_UCSF_scores.csv')

In [11]:
def filter_adh(df):
    df['task_status'] = np.where((df['finishedOn'].isnull()) | (df['declined'] != False), 'incomplete', 'complete')
    df[['participant_id','study_reference']] = df['externalId'].str.split(':',expand=True)
    df['participant_id'] = df['participant_id'].astype(str)
    return df

In [12]:
df_adh = filter_adh(adh_res)

### QC Check

#### 1: Overview on participant info: if atleast one took by participant
    - score_processed:
        - Yes: at least one score processed
        - No: No score processed
        - NaN: No adherence record available for participants
    - task_status:
        - complete: Adherence recod is complete
        - incomplete: In complete adherence record
- Check if any of the task uploaded on our side

In [13]:
df_adh_copy = df_adh
df_score_copy = df_score

In [14]:
def participant_check(df1, df2):
    df1_filter = df1.drop_duplicates(subset=['healthcode','participant_id'], keep='last').reset_index(drop=True)
    df2_filter = df2.drop_duplicates(subset=['healthcode','participant_id'], keep='last').reset_index(drop=True)
    
    df1_filter['score_processed'] = 'yes'
    df1_filter = df1_filter[['healthcode','participant_id', 'score_processed']]
    
    df_merge = df2_filter.merge(df1_filter, on =['healthcode','participant_id'], how='left')
    df_merge['score_processed'] = np.where(df_merge['instanceGuid'].isnull(), df_merge['score_processed'],
                                          np.where(df_merge['score_processed']=='yes', 
                                                   df_merge['score_processed'], 'no'))
    
    df_merge = df_merge[['userId', 'participant_id', 'healthcode', 'task_status', 'score_processed']]
    return df_merge
    

In [15]:
pt_check = participant_check(df_score_copy, df_adh_copy)

In [16]:
#Stats
no_adh_record = pt_check[pt_check['score_processed'].isnull()]
complete_rec = pt_check[pt_check['task_status']=='complete']
comp_rec_score_processed = len(complete_rec[complete_rec['score_processed']=='yes'])/len(complete_rec)

#### 2: Task level information