Merge pull request #204 from CancerDataAggregator/integration

Integration
CancerDataAggregator · Aug 22, 2023 · a120628 · a120628
2 parents a8a2b67 + c6004ef
commit a120628
Show file tree

Hide file tree

Showing 29 changed files with 1,891 additions and 123 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ cdapython.egg-info
 
 #virtualenv
 venv
+venv8
 #vscode
 .vscode
 
@@ -38,6 +39,7 @@ typeReport
 .mypy_cache
 .coverage
 .venv
+venv*
 venv2
 .ven3
 .venv_11

diff --git a/DataSummaries.ipynb b/DataSummaries.ipynb
@@ -32,9 +32,7 @@
    "cell_type": "code",
    "execution_count": 3,
    "id": "5245b7b0",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -320,9 +318,7 @@
    "cell_type": "code",
    "execution_count": 8,
    "id": "6d9137aa",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -702,9 +698,7 @@
    "cell_type": "code",
    "execution_count": 5,
    "id": "993479db",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "subjectresults"
@@ -831,9 +825,7 @@
    "cell_type": "code",
    "execution_count": 8,
    "id": "a2d8c874",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -974,9 +966,7 @@
    "cell_type": "code",
    "execution_count": 9,
    "id": "7770d68c",
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1732,7 +1722,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.7.13 ('venv': venv)",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1746,7 +1736,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.16"
+   "version": "3.8.16"
   },
   "metadata": {
    "interpreter": {

diff --git a/Untitled.ipynb b/Untitled.ipynb
@@ -376,7 +376,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.16"
+   "version": "3.8.16"
   },
   "vscode": {
    "interpreter": {

diff --git a/Untitled1.ipynb b/Untitled1.ipynb
@@ -444,7 +444,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.15"
+   "version": "3.8.16"
   },
   "vscode": {
    "interpreter": {

diff --git a/cdapython/Paginator.py b/cdapython/Paginator.py
@@ -3,7 +3,7 @@
 """
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, List, TypeVar, Union
+from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union
 
 from rich.progress import (
     BarColumn,
@@ -57,12 +57,9 @@ def __init__(
             TimeElapsedColumn(),
             MofNCompleteColumn(),
         )
+        self.progress.live._screen = True
+        self.task: Optional[TaskID] = None
         self.show_bar: bool = show_bar
-        if self.show_bar:
-            self.task: TaskID = self.progress.add_task(
-                "Processing", total=self.result.total_row_count
-            )
-            self.progress.update(self.task, advance=self.result.count)
 
     def _return_result(self) -> Union[DataFrame, List[Any], Result]:
         """_summary_
@@ -83,14 +80,39 @@ def _return_result(self) -> Union[DataFrame, List[Any], Result]:
         return self.result
 
     def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]:
+        """
+        This will get the next value from the result object and update the process bar
+        Args:
+            self (Paginator): _description_
+
+        Raises:
+            e: _description_
+
+        Returns:
+            Union[DataFrame, List[Any], Result, None]: _description_
+        """
+        if self.task is None and self.show_bar:
+            self.task = self.progress.add_task(
+                "Processing", total=self.result.total_row_count
+            )
+
+            self.progress.update(
+                task_id=self.task, advance=self.result.count, refresh=True
+            )
         result_nx = self._return_result()
+
         if self.result.has_next_page:
             try:
                 tmp_result = self.result.next_page(limit=self.limit)
                 if tmp_result:
                     self.result = tmp_result
                     if self.show_bar:
-                        self.progress.update(self.task, advance=self.result.count)
+                        if self.task is not None:
+                            self.progress.update(
+                                task_id=self.task,
+                                advance=self.result.count,
+                                refresh=True,
+                            )
                     return result_nx
             except Exception as e:
                 if self.show_bar:
@@ -104,6 +126,11 @@ def _do_next(self: Paginator) -> Union[DataFrame, List[Any], Result, None]:
         return None
 
     async def a_do_next(self) -> Union[List[T], DataFrame, Result, None]:
+        """
+        This is a async version of the do_next function this will return a result object
+        Returns:
+            Union[List[T], DataFrame, Result, None]: _description_
+        """
         return self._do_next()
 
     def __iter__(self) -> Paginator:
@@ -120,7 +147,6 @@ async def __anext__(self) -> Union[list, DataFrame, Result, None]:
         try:
             if self.stopped:
                 if self.show_bar:
-                    self.progress.update(self.task, advance=self.result.count)
                     self.progress.stop()
                 raise StopAsyncIteration
             self.count += self.result.count
@@ -134,7 +160,6 @@ def __next__(self) -> Union[list, DataFrame, Result, None]:
         try:
             if self.stopped:
                 if self.show_bar:
-                    self.progress.update(self.task, advance=self.result.count)
                     self.progress.stop()
                 raise StopIteration
             self.count += self.result.count

diff --git a/cdapython/Q.py b/cdapython/Q.py
@@ -594,24 +594,25 @@ def run(
         format_type: str = "json",
         show_sql: bool = False,
     ) -> Union[QueryCreatedData, ApplyResult, Result, DryClass, None]:
-        """_summary_
-        This will call the server to make a request return a Result like object
+        """
+        The run method serves to make a request and return a Result object.
         Args:
             offset (int, optional): _description_. Defaults to 0.
             page_size (int, optional): _description_. Defaults to 100.
-            version (Optional[str], optional): _description_. Defaults to None.
-            host (Optional[str], optional): _description_. Defaults to None.
+            limit (Union[int, None], optional): _description_. Defaults to None.
+            version (Union[str, None], optional): _description_. Defaults to None.
+            host (Union[str, None], optional): _description_. Defaults to None.
             dry_run (bool, optional): _description_. Defaults to False.
-            table (Optional[str], optional): _description_. Defaults to None.
+            table (Union[str, None], optional): _description_. Defaults to None.
             async_call (bool, optional): _description_. Defaults to False.
-            verify (Optional[bool], optional): _description_. Defaults to None.
-            verbose (Optional[bool], optional): _description_. Defaults to True.
-            filter (Optional[str], optional): _description_. Defaults to None.
-            flatten (Optional[bool], optional): _description_. Defaults to False.
-            format (Optional[str], optional): _description_. Defaults to "json".
+            verify (Union[bool, None], optional): _description_. Defaults to None.
+            verbose (bool, optional): _description_. Defaults to True.
+            include (Union[str, None], optional): _description_. Defaults to None.
+            format_type (str, optional): _description_. Defaults to "json".
+            show_sql (bool, optional): _description_. Defaults to False.
 
         Returns:
-            Optional[Result]: _description_
+            Union[QueryCreatedData, ApplyResult, Result, DryClass, None]: _description_
         """
         dry_run_current = False
 
@@ -648,7 +649,7 @@ def run(
                 # Execute boolean query
                 if verbose:
                     print(
-                        "Getting results from database",
+                        f"Getting {page_size} results from database ",
                         end="\n\n",
                     )
 

diff --git a/cdapython/__init__.py b/cdapython/__init__.py
@@ -5,6 +5,7 @@
 
 from typing import Any
 
+from rich.console import Console
 from typing_extensions import Literal
 
 from cdapython.constant_variables import Constants
@@ -112,11 +113,18 @@
     import builtins as __builtin__
 
 
-def console_print(*args: Any, **kwargs: Any) -> None:
-    from rich.console import Console
-
-    console: Console = Console()
-    console.print(*args, **kwargs)
+def console_print(*args, **kwargs) -> None:
+    """
+    This function is used to override the default python `print` method.
+    We do this by Attaching to pythons `__builtin__.print` method
+    note: There is a conditional check for a file.
+    if a system or user passes a file to the function It will default back to the original Python print method
+    """
+    if "file" in kwargs:
+        print(*args)
+    else:
+        console: Console = Console()
+        console.print(*args, **kwargs)
 
 
 __builtin__.print = console_print

diff --git a/cdapython/constant_variables.py b/cdapython/constant_variables.py
@@ -9,7 +9,7 @@ class Constants:
     This class Holds Q Global Constants used in Http methods
     """
 
-    _VERSION: str = "2023.6.13"
+    _VERSION: str = "2023.8.22"
     _DATABASETABLE_VERSION: str = "all_merged_subjects_v3_3_final"
     _DATABASETABLE: str = "broad-dsde-prod.cda_prod"
     _CDA_API_URL_ENV: str = "https://cda.datacommons.cancer.gov/"

diff --git a/cdapython/factories/count.py b/cdapython/factories/count.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 from cda_client.api.query_api import QueryApi
 from cda_client.api_client import Endpoint

diff --git a/cdapython/results/factories/collect_result.py b/cdapython/results/factories/collect_result.py
@@ -5,6 +5,7 @@
 from cda_client.api.query_api import QueryApi
 from cda_client.model.query_response_data import QueryResponseData
 
+from cdapython.Paginator import Paginator
 from cdapython.results.factories.not_paginated_result import NotPaginatedResult
 from cdapython.results.factories.result_factory import AbstractFactory
 from cdapython.utils.none_check import none_check
@@ -46,7 +47,7 @@ def __init__(
 
     def extend_result(self, result: Result) -> None:
         """_summary_
-        This will method will
+        This method will concat the list value to a global list
         Args:
             result (Result): _description_
         """
@@ -59,6 +60,17 @@ def get_all(
         page_size: Union[None, int] = None,
         show_bar: bool = True,
     ) -> Result:
+        """
+        This will page through the query results automatically looping and appending result objects to a List
+        for the user.
+        Args:
+            output (str, optional): if you add this output string "full_list"or"full_df" You will either get a list or a DataFrame. Defaults to "".
+            page_size (int, optional): this can increase the page size results . Defaults to None.  Known issue: the first page is always 100, even when the size is adjusted.
+            show_bar (bool, optional): This will show or hide the progress bar. Defaults to False.
+
+        Returns:
+            Result: _description_
+        """
         return super().get_all(output, page_size, show_bar)
 
     def paginator(
@@ -68,8 +80,27 @@ def paginator(
         to_list: bool = False,
         page_size: int = None,
         show_bar: bool = False,
-    ):
-        return super().paginator(output, to_df, to_list, page_size, show_bar)
+    ) -> Paginator:
+        """
+        This will return a pagination class (Collection) for iteration, such as the for loop.
+        By default, the pagination object will return a result class, which is normally what you will get from a `Q.run`
+        Args:
+            output (str, optional): if you add this output string "full_list"or"full_df" You will either get a list or a DataFrame. Defaults to "".
+            to_df (bool, optional): This will return a list from the DataFrame. Defaults to False.
+            to_list (bool, optional): This will return a list from the paginator. Defaults to False.
+            page_size (int, optional): this can change the page size of results . Defaults to results limit.
+            show_bar (bool, optional): This will show or hide the progress bar. Defaults to False.
+
+        Returns:
+            Paginator: _description_
+        """
+        return super().paginator(
+            output=output,
+            to_df=to_df,
+            to_list=to_list,
+            page_size=page_size,
+            show_bar=show_bar,
+        )
 
     class Factory(AbstractFactory):
         @staticmethod

diff --git a/cdapython/results/factories/not_paginated_result.py b/cdapython/results/factories/not_paginated_result.py
@@ -44,8 +44,8 @@ def paginator(
         page_size: int = None,
         show_bar: bool = False,
     ) -> Paginator:
-        """_summary_
-        This Object has already been paginated
+        """
+        This Object will create a Paginator class for Implementation
         Args:
             output (str, optional): _description_. Defaults to "".
             to_df (bool, optional): _description_. Defaults to False.
@@ -67,8 +67,8 @@ def get_all(
         page_size: Union[int, None] = None,
         show_bar: bool = True,
     ) -> Result:
-        """_summary_
-        This Object has already been paginated
+        """
+        This Object will create a paged value that will be Implemented
         Args:
             output (str, optional): _description_. Defaults to "".
             page_size (Union[int, None], optional): _description_. Defaults to None.

diff --git a/cdapython/results/result.py b/cdapython/results/result.py
@@ -176,7 +176,7 @@ def get_all(
         show_bar: bool = True,
     ) -> "CollectResult":
         """
-        get_all is a method that will loop for you
+        The get_all method automatically paginates for users and saves data into a `Result` like object.
 
         Args:
             output (str, optional): _description_. Defaults to "".