Skip to content
This repository has been archived by the owner on Apr 17, 2024. It is now read-only.

Commit

Permalink
Merge pull request #204 from CancerDataAggregator/integration
Browse files Browse the repository at this point in the history
Integration
  • Loading branch information
dionboles-asym committed Aug 22, 2023
2 parents a8a2b67 + c6004ef commit a120628
Show file tree
Hide file tree
Showing 29 changed files with 1,891 additions and 123 deletions.
Binary file modified .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ cdapython.egg-info

#virtualenv
venv
venv8
#vscode
.vscode

Expand All @@ -38,6 +39,7 @@ typeReport
.mypy_cache
.coverage
.venv
venv*
venv2
.ven3
.venv_11
Expand Down
24 changes: 7 additions & 17 deletions DataSummaries.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
"cell_type": "code",
"execution_count": 3,
"id": "5245b7b0",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -320,9 +318,7 @@
"cell_type": "code",
"execution_count": 8,
"id": "6d9137aa",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -702,9 +698,7 @@
"cell_type": "code",
"execution_count": 5,
"id": "993479db",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"subjectresults"
Expand Down Expand Up @@ -831,9 +825,7 @@
"cell_type": "code",
"execution_count": 8,
"id": "a2d8c874",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -974,9 +966,7 @@
"cell_type": "code",
"execution_count": 9,
"id": "7770d68c",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -1732,7 +1722,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.13 ('venv': venv)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -1746,7 +1736,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
"version": "3.8.16"
},
"metadata": {
"interpreter": {
Expand Down
2 changes: 1 addition & 1 deletion Untitled.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
"version": "3.8.16"
},
"vscode": {
"interpreter": {
Expand Down
2 changes: 1 addition & 1 deletion Untitled1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.15"
"version": "3.8.16"
},
"vscode": {
"interpreter": {
Expand Down
43 changes: 34 additions & 9 deletions cdapython/Paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from __future__ import annotations

from typing import TYPE_CHECKING, Any, List, TypeVar, Union
from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union

from rich.progress import (
BarColumn,
Expand Down Expand Up @@ -57,12 +57,9 @@ def __init__(
TimeElapsedColumn(),
MofNCompleteColumn(),
)
self.progress.live._screen = True
self.task: Optional[TaskID] = None
self.show_bar: bool = show_bar
if self.show_bar:
self.task: TaskID = self.progress.add_task(
"Processing", total=self.result.total_row_count
)
self.progress.update(self.task, advance=self.result.count)

def _return_result(self) -> Union[DataFrame, List[Any], Result]:
"""_summary_
Expand All @@ -83,14 +80,39 @@ def _return_result(self) -> Union[DataFrame, List[Any], Result]:
return self.result

def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]:
"""
This will get the next value from the result object and update the process bar
Args:
self (Paginator): _description_
Raises:
e: _description_
Returns:
Union[DataFrame, List[Any], Result, None]: _description_
"""
if self.task is None and self.show_bar:
self.task = self.progress.add_task(
"Processing", total=self.result.total_row_count
)

self.progress.update(
task_id=self.task, advance=self.result.count, refresh=True
)
result_nx = self._return_result()

if self.result.has_next_page:
try:
tmp_result = self.result.next_page(limit=self.limit)
if tmp_result:
self.result = tmp_result
if self.show_bar:
self.progress.update(self.task, advance=self.result.count)
if self.task is not None:
self.progress.update(
task_id=self.task,
advance=self.result.count,
refresh=True,
)
return result_nx
except Exception as e:
if self.show_bar:
Expand All @@ -104,6 +126,11 @@ def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]:
return None

async def a_do_next(self) -> Union[List[T], DataFrame, Result, None]:
"""
This is a async version of the do_next function this will return a result object
Returns:
Union[List[T], DataFrame, Result, None]: _description_
"""
return self._do_next()

def __iter__(self) -> Paginator:
Expand All @@ -120,7 +147,6 @@ async def __anext__(self) -> Union[list, DataFrame, Result, None]:
try:
if self.stopped:
if self.show_bar:
self.progress.update(self.task, advance=self.result.count)
self.progress.stop()
raise StopAsyncIteration
self.count += self.result.count
Expand All @@ -134,7 +160,6 @@ def __next__(self) -> Union[list, DataFrame, Result, None]:
try:
if self.stopped:
if self.show_bar:
self.progress.update(self.task, advance=self.result.count)
self.progress.stop()
raise StopIteration
self.count += self.result.count
Expand Down
25 changes: 13 additions & 12 deletions cdapython/Q.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,24 +594,25 @@ def run(
format_type: str = "json",
show_sql: bool = False,
) -> Union[QueryCreatedData, ApplyResult, Result, DryClass, None]:
"""_summary_
This will call the server to make a request return a Result like object
"""
The run method serves to make a request and return a Result object.
Args:
offset (int, optional): _description_. Defaults to 0.
page_size (int, optional): _description_. Defaults to 100.
version (Optional[str], optional): _description_. Defaults to None.
host (Optional[str], optional): _description_. Defaults to None.
limit (Union[int, None], optional): _description_. Defaults to None.
version (Union[str, None], optional): _description_. Defaults to None.
host (Union[str, None], optional): _description_. Defaults to None.
dry_run (bool, optional): _description_. Defaults to False.
table (Optional[str], optional): _description_. Defaults to None.
table (Union[str, None], optional): _description_. Defaults to None.
async_call (bool, optional): _description_. Defaults to False.
verify (Optional[bool], optional): _description_. Defaults to None.
verbose (Optional[bool], optional): _description_. Defaults to True.
filter (Optional[str], optional): _description_. Defaults to None.
flatten (Optional[bool], optional): _description_. Defaults to False.
format (Optional[str], optional): _description_. Defaults to "json".
verify (Union[bool, None], optional): _description_. Defaults to None.
verbose (bool, optional): _description_. Defaults to True.
include (Union[str, None], optional): _description_. Defaults to None.
format_type (str, optional): _description_. Defaults to "json".
show_sql (bool, optional): _description_. Defaults to False.
Returns:
Optional[Result]: _description_
Union[QueryCreatedData, ApplyResult, Result, DryClass, None]: _description_
"""
dry_run_current = False

Expand Down Expand Up @@ -648,7 +649,7 @@ def run(
# Execute boolean query
if verbose:
print(
"Getting results from database",
f"Getting {page_size} results from database ",
end="\n\n",
)

Expand Down
18 changes: 13 additions & 5 deletions cdapython/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from typing import Any

from rich.console import Console
from typing_extensions import Literal

from cdapython.constant_variables import Constants
Expand Down Expand Up @@ -112,11 +113,18 @@
import builtins as __builtin__


def console_print(*args: Any, **kwargs: Any) -> None:
from rich.console import Console

console: Console = Console()
console.print(*args, **kwargs)
def console_print(*args, **kwargs) -> None:
"""
This function is used to override the default python `print` method.
We do this by Attaching to pythons `__builtin__.print` method
note: There is a conditional check for a file.
if a system or user passes a file to the function It will default back to the original Python print method
"""
if "file" in kwargs:
print(*args)
else:
console: Console = Console()
console.print(*args, **kwargs)


__builtin__.print = console_print
Expand Down
2 changes: 1 addition & 1 deletion cdapython/constant_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Constants:
This class Holds Q Global Constants used in Http methods
"""

_VERSION: str = "2023.6.13"
_VERSION: str = "2023.8.22"
_DATABASETABLE_VERSION: str = "all_merged_subjects_v3_3_final"
_DATABASETABLE: str = "broad-dsde-prod.cda_prod"
_CDA_API_URL_ENV: str = "https://cda.datacommons.cancer.gov/"
Expand Down
2 changes: 1 addition & 1 deletion cdapython/factories/count.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

from cda_client.api.query_api import QueryApi
from cda_client.api_client import Endpoint
Expand Down
37 changes: 34 additions & 3 deletions cdapython/results/factories/collect_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cda_client.api.query_api import QueryApi
from cda_client.model.query_response_data import QueryResponseData

from cdapython.Paginator import Paginator
from cdapython.results.factories.not_paginated_result import NotPaginatedResult
from cdapython.results.factories.result_factory import AbstractFactory
from cdapython.utils.none_check import none_check
Expand Down Expand Up @@ -46,7 +47,7 @@ def __init__(

def extend_result(self, result: Result) -> None:
"""_summary_
This will method will
This method will concat the list value to a global list
Args:
result (Result): _description_
"""
Expand All @@ -59,6 +60,17 @@ def get_all(
page_size: Union[None, int] = None,
show_bar: bool = True,
) -> Result:
"""
This will page through the query results automatically looping and appending result objects to a List
for the user.
Args:
output (str, optional): if you add this output string "full_list"or"full_df" You will either get a list or a DataFrame. Defaults to "".
page_size (int, optional): this can increase the page size results . Defaults to None. Known issue: the first page is always 100, even when the size is adjusted.
show_bar (bool, optional): This will show or hide the progress bar. Defaults to False.
Returns:
Result: _description_
"""
return super().get_all(output, page_size, show_bar)

def paginator(
Expand All @@ -68,8 +80,27 @@ def paginator(
to_list: bool = False,
page_size: int = None,
show_bar: bool = False,
):
return super().paginator(output, to_df, to_list, page_size, show_bar)
) -> Paginator:
"""
This will return a pagination class (Collection) for iteration, such as the for loop.
By default, the pagination object will return a result class, which is normally what you will get from a `Q.run`
Args:
output (str, optional): if you add this output string "full_list"or"full_df" You will either get a list or a DataFrame. Defaults to "".
to_df (bool, optional): This will return a list from the DataFrame. Defaults to False.
to_list (bool, optional): This will return a list from the paginator. Defaults to False.
page_size (int, optional): this can change the page size of results . Defaults to results limit.
show_bar (bool, optional): This will show or hide the progress bar. Defaults to False.
Returns:
Paginator: _description_
"""
return super().paginator(
output=output,
to_df=to_df,
to_list=to_list,
page_size=page_size,
show_bar=show_bar,
)

class Factory(AbstractFactory):
@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions cdapython/results/factories/not_paginated_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def paginator(
page_size: int = None,
show_bar: bool = False,
) -> Paginator:
"""_summary_
This Object has already been paginated
"""
This Object will create a Paginator class for Implementation
Args:
output (str, optional): _description_. Defaults to "".
to_df (bool, optional): _description_. Defaults to False.
Expand All @@ -67,8 +67,8 @@ def get_all(
page_size: Union[int, None] = None,
show_bar: bool = True,
) -> Result:
"""_summary_
This Object has already been paginated
"""
This Object will create a paged value that will be Implemented
Args:
output (str, optional): _description_. Defaults to "".
page_size (Union[int, None], optional): _description_. Defaults to None.
Expand Down
2 changes: 1 addition & 1 deletion cdapython/results/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def get_all(
show_bar: bool = True,
) -> "CollectResult":
"""
get_all is a method that will loop for you
The get_all method automatically paginates for users and saves data into a `Result` like object.
Args:
output (str, optional): _description_. Defaults to "".
Expand Down

0 comments on commit a120628

Please sign in to comment.