# Requesting Data

Below is an example of requesting data from the Champions API

In [None]:
import requests
import pandas as pd
import string
import random
import pprint
import json
import time
import os
pp = pprint.PrettyPrinter(width=79, compact=True)

### Define your credentials

Credentials can be found on [your Lumin Workspace page](https://database.championsoncology.com/lumin/workstation/)

In [None]:
# Define user credentials
username = '---'
client_id = 99
user_id = 0000
token = ''

Below is a function we use to retrieve data.

We use this to abstract away the work of a data request.

You shouldn't need to change anything here.

Just run the cell so the function is available in the next step

In [None]:
def request_data(table="expression", cancer_type = ["all"], genes = [], samples = [], drugs = None, source = "PDX", data_set_name = None):
    
    if data_set_name is None:
        # generate a folder name if none was specified
        res = ''.join(random.choices(string.ascii_uppercase +
                             string.digits, k = 6))
        data_set_name = 'Data-'+res
    else:
        # Remove any special characters
        data_set_name = ''.join(e for e in data_set_name if e.isalnum())

    # Define the data request dictionary
    d = {
        "request_data_type": table,
        "request_cancer_type": cancer_type,
        "request_genes": genes,
        "request_models": samples,
        "request_agents": None,
        "request_dataset": source,
        "request_workspace_id": username,
        "request_client": client_id,
        "request_user": user_id,
        "request_mode": True,
        "request_display_error": False,
        "preview": True
    }

    # create the request
    headers={"authorization":"Bearer "+token}
    response = requests.post('https://lumin-fast-api.championsoncology.com/workstation/',json=d, headers=headers)
    task_id = None
    task_id = response.json()['task_id']

    if task_id is None:
        print("Error: No task_id returned. Please check the request details")
        return None
    else:
        print('')
        print("Data Request ID: {}".format(task_id))

        # check the status of the request until it's complete
        while True:
            response = requests.get('https://lumin-fast-api.championsoncology.com/tasks/poll/'+task_id+'?old_progress=0',json=d, headers=headers)
            state = response.json()['state']
            if state == 'SUCCESS':
                result = response.json()['result']
                rd = json.loads(result)
                #pp.pprint(rd)
                folder = rd['name']
                break
            elif state == 'PROGRESS':
                print("Still loading...")
            elif state == 'error':
                print("Error: {}".format(response.json()['error']))
                return None, None
            else:
                print("Status: {}".format(state))
                time.sleep(20)
            #pp.pprint(response.json())

        # Change the folder name to the data set name
        directory = os.path.expanduser("~")
        #print(directory)
        os.rename(directory+'/'+folder, directory+'/'+data_set_name)

        # combine the files and save as a CSV
        folder = directory+"/"+data_set_name
        data = pd.DataFrame()
        i = 0
        for file in os.listdir(folder):
            i = i+1
            print(f'reading file {i} {file} ...')
            df = pd.read_json(folder+'/'+file)
            data = pd.concat([data, df])
    
        #print(data)
        data.to_csv(folder+'/'+data_set_name+'.csv', encoding='utf-8')

        # return the file name and location to the user
        return data,folder+'/'+data_set_name+'.csv'

print("Function loaded")

### 2. Request Data

Now we can make our data request. The request returns two results, a dataframe with your data, and the file location of your data saved as a CSV file.

You can use the `location` variable to load straight from the CSV the next time you use this notebook. You don't need to request data from the API again until you want to change your analysis.

In [None]:
# Request TGI data - run once
#df, location = request_data(table="TGI", drugs=["Olaparib"])
#print("Data saved to {} and available as df".format(location))

# Save just the columns we need
#data_df = df[['model_name','agent','TGI','tumor_type']]
#print(data_df)

#data_df.to_csv(location, encoding='utf-8')


# Load from CSV next time
location = '../Data/TGI.csv'
load_data_df = pd.read_csv(location)
print(load_data_df.head(5))

#df, location = request_data(table="expression", genes = ['BRCA1', 'BRCA2'], data_set_name="BRCA!_BRCA2")
#print("Data saved to {} and available as df".format(location))