In [1]:
### Importing libraries

import omxware
import math

In [2]:
# --- Authentication options --- #
# generate token with OMXWare user name and password (likely done once and then stored in file, see usage below)
# token = omxware.get_token('<user_name>', '<pw>') # fill in password to initiate token

# or

# use previously generated token loaded from file
token_path = "./omxware_collaborative_work/super_awesome_token"  # update for your path to the token file you create
f = open(token_path)
token = f.readline()

# provide your token to authenticate with OMXWare
omx = omxware.omxware(token)

In [3]:
# Setting the search term (gene name)
search_string = 'cytotoxin'

In [6]:
###########################################################
### STEP 1
### Get total results count and calculating total page count based on this value.
### For more detail: https://ibm.github.io/omxware-pypi-docs/build/html/omxware.entities.html#module-omxware.entities.Genus
###########################################################

# Performing search and getting the total result count
response = omx.genes(gene_name=search_string)
total_results_count = response.total_results()
print("For '{}', the Total Result Count is {}.".format(search_string, total_results_count))

# Setting the page size
page_size = 100

# Calculating the total page count based on the page size and the total result count.
total_page_count = math.ceil(total_results_count / page_size)

print("Page Size: {}".format(page_size))
print("Total Page Count: {}".format(total_page_count))

For 'cytotoxin', the Total Result Count is 3299.
Page Size: 100
Total Page Count: 33


In [None]:
###########################################################
### STEP 2
### Loop through pages of results and add to a dataframe
### Can increase page size to speed this up too
###########################################################

# Filling all data into a single dataframe
results_df = None
for x in range(1,total_page_count+1):
    print("Filling result dataframe... [{}/{}]".format(x, total_page_count))
    response_for_one_page = omx.genes(gene_name=search_string, page_size=page_size, page_number=x)
    df_for_one_page = response_for_one_page.results(type='df')
    if(results_df is None):
        results_df = df_for_one_page
    else:
        results_df = results_df.append(df_for_one_page)


print("\nLength of the Result Dataframe: {}".format(len(results_df)))

Filling result dataframe... [1/33]
Filling result dataframe... [2/33]
Filling result dataframe... [3/33]
Filling result dataframe... [4/33]
Filling result dataframe... [5/33]
Filling result dataframe... [6/33]
Filling result dataframe... [7/33]
Filling result dataframe... [8/33]
Filling result dataframe... [9/33]
Filling result dataframe... [10/33]
Filling result dataframe... [11/33]
Filling result dataframe... [12/33]
Filling result dataframe... [13/33]
Filling result dataframe... [14/33]
Filling result dataframe... [15/33]
Filling result dataframe... [16/33]
Filling result dataframe... [17/33]
Filling result dataframe... [18/33]


In [None]:
results_df.head()