# Flat Batch GET to CSV

### Imports

In [7]:
import boto3 
import requests
import time
import pandas as pd 
import json 


In [8]:
bedrock_runtime =boto3.client(
    service_name="s3",
    region_name ="us-east-1"
)

In [9]:
! aws s3 ls

2023-04-25 16:09:07 athena-test-query-results12
2023-04-21 16:10:57 google-maps-places-123
2023-04-24 16:32:01 gplaces12345
2023-11-06 10:43:06 itv-flights-st
2023-10-23 22:05:13 itv-github-st
2023-12-19 15:56:09 particle-flat-demo
2023-07-09 21:31:59 sangyet-resume-page


## Setup Tools

### Retrieve Boost Data

In [2]:

def collect_boost_data(base_url, jwt, query_id, dataset_params = ''):
    """Retrieve specified Boost Datasets via calling GET boost/collect-data"""
    
    #set headers
    headers = {
        "accept": "application/json",
        "Authorization": jwt
    }
    
    #make GET request to /boost - if dataset_params are left as None, all datasets will be requested
    response = requests.get(base_url + "/boost/" + query_id + "/collect-data" + dataset_params, headers=headers)
    
    #if 200 - return boost collect-data output, otherwise raise exception
    if response.status_code == 200:
        print(f"Retrieved the Boost Data for QueryID: {query_id} - Status Code: {response.status_code}")
        return response.json()
    else:
        raise Exception(
            f"GET boost/query_id/collect-data failed: {response.status_code}"
        )
    

### Structure Boost Output into Pandas DataFrames

In [3]:
def structure_boost_output(dataframe_map, boost_output):
    """Structures each returned Boost Dataset into a Pandas DataFrame and returns a map containing them all"""

    #convert each dataset in the output into its own pandas dataframe and store it to the map with the dataset name as the key
    for dataset in boost_output:
        try:
            dataframe_map[dataset] = dataframe_map[dataset].append(pd.DataFrame.from_records(boost_output[dataset])).reset_index(drop=True)
        except KeyError:
            dataframe_map[dataset] = pd.DataFrame.from_records(boost_output[dataset])
        
    #return the map of dataframes
    return dataframe_map


### Read first column if a CSV into a list

In [4]:
def read_first_column_to_list(csv_file):
    """
    Open a CSV file and save the content of the first column in a list.

    Parameters:
        csv_file (str): The path to the CSV file.

    Returns:
        list: A list containing the content of the first column.
    """
    try:
        df = pd.read_csv(csv_file)
        column_data = df.iloc[:, 0].tolist()
        return column_data
    except Exception as e:
        print(f"Error reading CSV file '{csv_file}': {e}")
        return []

### Save dataframes to csv and Folder 

In [5]:
import os
import pandas as pd

def save_dataframes_to_csv(dataframes, output_folder):
    """
    Save pandas DataFrames as CSV files in a specified folder.

    Parameters:
        dataframes (list): A list of pandas DataFrames.
        output_folder (str): The folder path where the CSV files will be saved.

    Returns:
        None
    """
    # Create the output folder if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, df in enumerate(dataframes):
        filename = os.path.join(output_folder, f"{df}.csv")
        dataframe_map[df].to_csv(filename, index=False)
        try:
            dataframe_map[df].to_csv(filename, index=False)
            print(f"DataFrame '{df}' SAVED to {output_folder}")
        except Exception as e:
            print(f"....Error saving DataFrame '{df}': {e}, Most likely no data for this queryID")
        

## Set Variables

In [16]:
# make a request to the auth endpoint with the client id and client secret to recieve the jwt
clientId = "4c5b528f-7833-4295-9e45-043d4777ac60"
clientSecret = '06836067fd63cfad861a69e2cb197cf367baed162f033fe8656d2b51e1e16329da7f3e91e88656ec5753052a959dff22f624e0925707e050997c20457eaae55a'
scope = "projects/5136974a-22ea-4601-8c65-8f0e4dfe8ac2"

In [17]:
#set base url to use for API requests
base_url = 'https://sandbox.particlehealth.com'

In [None]:
csv_queryID_file_name = 

In [20]:
Query_ids = ['833ab9ac-ff12-436c-8a7c-89db15d1afd6']

## Run

In [19]:
### Auth

url = 'https://sandbox.particlehealth.com/auth'
headers = {'client-id': clientId, 'client-secret': clientSecret, 'scope': scope}
r = requests.get(url, headers=headers)

print(r) ## prints the status code
jwtSandbox = r.text
#print(f'this is the JWT:\n{jwtSandbox}') ## prints JWT

<Response [200]>


In [22]:
for q in Query_ids:
    boost_output = collect_boost_data(base_url, jwtSandbox, q)
    dataframe_map = dict()
    structure_boost_output(dataframe_map, boost_output)


    # List of DataFrames
    dataframes_list = ['allergies'
                       ,'composition'
                       ,'documentReferences'
                       ,'encounters'
                       ,'immunizations'
                       ,'labs'
                       ,'locations'
                       ,'medications'
                       ,'practitioners'
                       ,'problems'
                       ,'procedures'
                       ,'socialHistories'
                       ,'vitalSigns']

    # Output folder path
    output_folder = q

    # Call the function to save DataFrames as CSVs
    save_dataframes_to_csv(dataframes_list, output_folder)


Retrieved the Boost Data for QueryID: 833ab9ac-ff12-436c-8a7c-89db15d1afd6 - Status Code: 200
DataFrame 'allergies' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'composition' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'documentReferences' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'encounters' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'immunizations' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'labs' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'locations' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'medications' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'practitioners' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'problems' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'procedures' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'socialHistories' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd6
DataFrame 'vitalSigns' SAVED to 833ab9ac-ff12-436c-8a7c-89db15d1afd