Skip to content
This repository has been archived by the owner on Apr 17, 2024. It is now read-only.

Integration #204

Merged
merged 20 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
98a99a8
trying to fix unique_terms for get all
dionboles-asym Jul 10, 2023
c9d7031
testing unique term get_all
dionboles-asym Jul 17, 2023
40bb09a
added get_all to unique_terms and fixed progress bar
dionboles-asym Jul 19, 2023
320e692
fix python3.8 type error to stop pip install
dionboles-asym Jul 20, 2023
cee8ab9
typeerror
dionboles-asym Jul 20, 2023
28e28a1
updated docstring
dionboles-asym Jul 20, 2023
1a14a0d
added docstring
dionboles-asym Jul 24, 2023
bc92cb9
fixed a bug in progress bar when it will only update once.
dionboles-asym Jul 25, 2023
7131f7e
Apply suggestions from code review
dionboles-asym Jul 25, 2023
335bf4f
Merge pull request #201 from CancerDataAggregator/unique_terms_get_all
dionboles-asym Jul 25, 2023
f7a382c
updated test
dionboles-asym Aug 16, 2023
9612395
Merge branch 'integration' into CD-526-Make-get_all-work-properly-wit…
dionboles-asym Aug 16, 2023
5efd7ac
test to check graph changes from integration to prod
dionboles-asym Aug 17, 2023
00fae6f
updated requirements.txt
dionboles-asym Aug 17, 2023
6f54875
added test to check primary_disease_type and file_formats of tsv and bam
dionboles-asym Aug 18, 2023
bb5c95e
Merge pull request #202 from CancerDataAggregator/CD-526-Make-get_all…
dionboles-asym Aug 21, 2023
47d29e3
updated docstring and version dates
dionboles-asym Aug 22, 2023
5ae3b32
forgot to update version check test
dionboles-asym Aug 22, 2023
1874673
updated docstr for get_all
dionboles-asym Aug 22, 2023
c6004ef
Merge pull request #203 from CancerDataAggregator/CD-569-updates-for-…
dionboles-asym Aug 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ cdapython.egg-info

#virtualenv
venv
venv8
#vscode
.vscode

Expand All @@ -38,6 +39,7 @@ typeReport
.mypy_cache
.coverage
.venv
venv*
venv2
.ven3
.venv_11
Expand Down
24 changes: 7 additions & 17 deletions DataSummaries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
"cell_type": "code",
"execution_count": 3,
"id": "5245b7b0",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -320,9 +318,7 @@
"cell_type": "code",
"execution_count": 8,
"id": "6d9137aa",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -702,9 +698,7 @@
"cell_type": "code",
"execution_count": 5,
"id": "993479db",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"subjectresults"
Expand Down Expand Up @@ -831,9 +825,7 @@
"cell_type": "code",
"execution_count": 8,
"id": "a2d8c874",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -974,9 +966,7 @@
"cell_type": "code",
"execution_count": 9,
"id": "7770d68c",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -1732,7 +1722,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.13 ('venv': venv)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -1746,7 +1736,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
"version": "3.8.16"
},
"metadata": {
"interpreter": {
Expand Down
2 changes: 1 addition & 1 deletion Untitled.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
"version": "3.8.16"
},
"vscode": {
"interpreter": {
Expand Down
2 changes: 1 addition & 1 deletion Untitled1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.8.16"
},
"vscode": {
"interpreter": {
Expand Down
43 changes: 34 additions & 9 deletions cdapython/Paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from __future__ import annotations

from typing import TYPE_CHECKING, Any, List, TypeVar, Union
from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union

from rich.progress import (
BarColumn,
Expand Down Expand Up @@ -57,12 +57,9 @@ def __init__(
TimeElapsedColumn(),
MofNCompleteColumn(),
)
self.progress.live._screen = True
self.task: Optional[TaskID] = None
self.show_bar: bool = show_bar
if self.show_bar:
self.task: TaskID = self.progress.add_task(
"Processing", total=self.result.total_row_count
)
self.progress.update(self.task, advance=self.result.count)

def _return_result(self) -> Union[DataFrame, List[Any], Result]:
"""_summary_
Expand All @@ -83,14 +80,39 @@ def _return_result(self) -> Union[DataFrame, List[Any], Result]:
return self.result

def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]:
"""
This will get the next value from the result object and update the process bar
Args:
self (Paginator): _description_

Raises:
e: _description_

Returns:
Union[DataFrame, List[Any], Result, None]: _description_
"""
if self.task is None and self.show_bar:
self.task = self.progress.add_task(
"Processing", total=self.result.total_row_count
)

self.progress.update(
task_id=self.task, advance=self.result.count, refresh=True
)
result_nx = self._return_result()

if self.result.has_next_page:
try:
tmp_result = self.result.next_page(limit=self.limit)
if tmp_result:
self.result = tmp_result
if self.show_bar:
self.progress.update(self.task, advance=self.result.count)
if self.task is not None:
self.progress.update(
task_id=self.task,
advance=self.result.count,
refresh=True,
)
return result_nx
except Exception as e:
if self.show_bar:
Expand All @@ -104,6 +126,11 @@ def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]:
return None

async def a_do_next(self) -> Union[List[T], DataFrame, Result, None]:
"""
This is a async version of the do_next function this will return a result object
Returns:
Union[List[T], DataFrame, Result, None]: _description_
"""
return self._do_next()

def __iter__(self) -> Paginator:
Expand All @@ -120,7 +147,6 @@ async def __anext__(self) -> Union[list, DataFrame, Result, None]:
try:
if self.stopped:
if self.show_bar:
self.progress.update(self.task, advance=self.result.count)
self.progress.stop()
raise StopAsyncIteration
self.count += self.result.count
Expand All @@ -134,7 +160,6 @@ def __next__(self) -> Union[list, DataFrame, Result, None]:
try:
if self.stopped:
if self.show_bar:
self.progress.update(self.task, advance=self.result.count)
self.progress.stop()
raise StopIteration
self.count += self.result.count
Expand Down
25 changes: 13 additions & 12 deletions cdapython/Q.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,24 +594,25 @@ def run(
format_type: str = "json",
show_sql: bool = False,
) -> Union[QueryCreatedData, ApplyResult, Result, DryClass, None]:
"""_summary_
This will call the server to make a request return a Result like object
"""
The run method serves to make a request and return a Result object.
Args:
offset (int, optional): _description_. Defaults to 0.
page_size (int, optional): _description_. Defaults to 100.
version (Optional[str], optional): _description_. Defaults to None.
host (Optional[str], optional): _description_. Defaults to None.
limit (Union[int, None], optional): _description_. Defaults to None.
version (Union[str, None], optional): _description_. Defaults to None.
host (Union[str, None], optional): _description_. Defaults to None.
dry_run (bool, optional): _description_. Defaults to False.
table (Optional[str], optional): _description_. Defaults to None.
table (Union[str, None], optional): _description_. Defaults to None.
async_call (bool, optional): _description_. Defaults to False.
verify (Optional[bool], optional): _description_. Defaults to None.
verbose (Optional[bool], optional): _description_. Defaults to True.
filter (Optional[str], optional): _description_. Defaults to None.
flatten (Optional[bool], optional): _description_. Defaults to False.
format (Optional[str], optional): _description_. Defaults to "json".
verify (Union[bool, None], optional): _description_. Defaults to None.
verbose (bool, optional): _description_. Defaults to True.
include (Union[str, None], optional): _description_. Defaults to None.
format_type (str, optional): _description_. Defaults to "json".
show_sql (bool, optional): _description_. Defaults to False.

Returns:
Optional[Result]: _description_
Union[QueryCreatedData, ApplyResult, Result, DryClass, None]: _description_
"""
dry_run_current = False

Expand Down Expand Up @@ -648,7 +649,7 @@ def run(
# Execute boolean query
if verbose:
print(
"Getting results from database",
f"Getting {page_size} results from database ",
end="\n\n",
)

Expand Down
18 changes: 13 additions & 5 deletions cdapython/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from typing import Any

from rich.console import Console
from typing_extensions import Literal

from cdapython.constant_variables import Constants
Expand Down Expand Up @@ -112,11 +113,18 @@
import builtins as __builtin__


def console_print(*args: Any, **kwargs: Any) -> None:
from rich.console import Console

console: Console = Console()
console.print(*args, **kwargs)
def console_print(*args, **kwargs) -> None:
"""
This function is used to override the default python `print` method.
We do this by Attaching to pythons `__builtin__.print` method
note: There is a conditional check for a file.
if a system or user passes a file to the function It will default back to the original Python print method
"""
if "file" in kwargs:
print(*args)
else:
console: Console = Console()
console.print(*args, **kwargs)


__builtin__.print = console_print
Expand Down
2 changes: 1 addition & 1 deletion cdapython/constant_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Constants:
This class Holds Q Global Constants used in Http methods
"""

_VERSION: str = "2023.6.13"
_VERSION: str = "2023.8.22"
_DATABASETABLE_VERSION: str = "all_merged_subjects_v3_3_final"
_DATABASETABLE: str = "broad-dsde-prod.cda_prod"
_CDA_API_URL_ENV: str = "https://cda.datacommons.cancer.gov/"
Expand Down
2 changes: 1 addition & 1 deletion cdapython/factories/count.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from cda_client.api.query_api import QueryApi
from cda_client.api_client import Endpoint
Expand Down
37 changes: 34 additions & 3 deletions cdapython/results/factories/collect_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cda_client.api.query_api import QueryApi
from cda_client.model.query_response_data import QueryResponseData

from cdapython.Paginator import Paginator
from cdapython.results.factories.not_paginated_result import NotPaginatedResult
from cdapython.results.factories.result_factory import AbstractFactory
from cdapython.utils.none_check import none_check
Expand Down Expand Up @@ -46,7 +47,7 @@ def __init__(

def extend_result(self, result: Result) -> None:
"""_summary_
This will method will
This method will concat the list value to a global list
Args:
result (Result): _description_
"""
Expand All @@ -59,6 +60,17 @@ def get_all(
page_size: Union[None, int] = None,
show_bar: bool = True,
) -> Result:
"""
This will page through the query results automatically looping and appending result objects to a List
for the user.
Args:
output (str, optional): if you add this output string "full_list"or"full_df" You will either get a list or a DataFrame. Defaults to "".
page_size (int, optional): this can increase the page size results . Defaults to None. Known issue: the first page is always 100, even when the size is adjusted.
show_bar (bool, optional): This will show or hide the progress bar. Defaults to False.

Returns:
Result: _description_
"""
return super().get_all(output, page_size, show_bar)

def paginator(
Expand All @@ -68,8 +80,27 @@ def paginator(
to_list: bool = False,
page_size: int = None,
show_bar: bool = False,
):
return super().paginator(output, to_df, to_list, page_size, show_bar)
) -> Paginator:
"""
This will return a pagination class (Collection) for iteration, such as the for loop.
By default, the pagination object will return a result class, which is normally what you will get from a `Q.run`
Args:
output (str, optional): if you add this output string "full_list"or"full_df" You will either get a list or a DataFrame. Defaults to "".
to_df (bool, optional): This will return a list from the DataFrame. Defaults to False.
to_list (bool, optional): This will return a list from the paginator. Defaults to False.
page_size (int, optional): this can change the page size of results . Defaults to results limit.
show_bar (bool, optional): This will show or hide the progress bar. Defaults to False.

Returns:
Paginator: _description_
"""
return super().paginator(
output=output,
to_df=to_df,
to_list=to_list,
page_size=page_size,
show_bar=show_bar,
)

class Factory(AbstractFactory):
@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions cdapython/results/factories/not_paginated_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def paginator(
page_size: int = None,
show_bar: bool = False,
) -> Paginator:
"""_summary_
This Object has already been paginated
"""
This Object will create a Paginator class for Implementation
Args:
output (str, optional): _description_. Defaults to "".
to_df (bool, optional): _description_. Defaults to False.
Expand All @@ -67,8 +67,8 @@ def get_all(
page_size: Union[int, None] = None,
show_bar: bool = True,
) -> Result:
"""_summary_
This Object has already been paginated
"""
This Object will create a paged value that will be Implemented
Args:
output (str, optional): _description_. Defaults to "".
page_size (Union[int, None], optional): _description_. Defaults to None.
Expand Down
2 changes: 1 addition & 1 deletion cdapython/results/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def get_all(
show_bar: bool = True,
) -> "CollectResult":
"""
get_all is a method that will loop for you
The get_all method automatically paginates for users and saves data into a `Result` like object.

Args:
output (str, optional): _description_. Defaults to "".
Expand Down