Skip to content
This repository has been archived by the owner on Apr 17, 2024. It is now read-only.

Commit

Permalink
Merge pull request #196 from CancerDataAggregator/ah_revert_3
Browse files Browse the repository at this point in the history
revert the to_list and to_dataframe changes
  • Loading branch information
ahaessly committed Apr 18, 2023
2 parents 5d61dd5 + 1198bfb commit f1229da
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 117 deletions.
1 change: 0 additions & 1 deletion cdapython/results/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def to_dataframe(
max_level: Union[int, None] = None,
search_fields: Union[List[str], str, None] = None,
search_value: str = "",
allow_substring: bool = True,
) -> DataFrame:
"""[summary]
Creates a pandas DataFrame for the Results
Expand Down
142 changes: 88 additions & 54 deletions cdapython/results/columns_result.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
from typing import Any, Dict, List, Optional, Union

from pandas import DataFrame, Index, json_normalize, concat
from pandas import DataFrame, Index, json_normalize, merge
from typing_extensions import Literal, TypedDict

from cdapython.results.base import BaseResult


class _Column_Types(TypedDict):
"""
This is made for typechecking a dict
Args:
TypedDict (_type_): _description_
"""

fieldName: str
endpoint: str
description: str
mode: str


_Column_str = Union[
Literal["fieldName"], Literal["endpoint"], Literal["description"], Literal["mode"]
]


class ColumnsResult(BaseResult):
def __init__(
self,
Expand Down Expand Up @@ -33,29 +52,65 @@ def __str__(self) -> str:
return self._repr_value(show_value=self.show_sql)

def to_list(
self,
search_fields: Union[str, List[str], None] = None,
search_value: Union[str, None] = None,
allow_substring: bool = True,
self, filters: Optional[str] = None, exact: bool = False, endpoint: str = ""
) -> List[Any]:
"""_summary_
Args:
allow_substring (bool, optional): Whether the seach_value should match if it is only part of a word. Defaults to True.
search_fields (Union[str, List[str], None]): _description_. Defaults to None.
search_value (Optional[str], optional): _description_. Defaults to None.
Returns:
List[Any]: _description_
"""
result = self.to_dataframe(
search_fields=search_fields,
search_value=search_value,
allow_substring=allow_substring,
)
if filters is not None and filters != "":
values: Union[List[_Column_Types], List[Any]] = []
filters = filters.replace("\n", " ").strip()
if self.description is False:
values.extend(
[i["fieldName"] for i in self._result if i["fieldName"] is not None]
)
if self.description:
values.extend([i for i in self._result if list(i) is not None])
# values = list(filter(None, values))
if exact:
if self.description is False:
return list(
filter(
lambda items: (
str(items["fieldName"]).lower() == filters.lower()
),
values,
)
)
return list(
filter(
lambda items: (
str(items["description"]).lower() == filters.lower()
or str(items["endpoint"]).lower() == filters.lower()
or str(items["fieldName"]).lower() == filters.lower()
),
values,
)
)
else:
if self.description is False:
return list(
filter(
lambda items: (
str(items).lower().find(str(filters.lower())) != -1
),
values,
)
)
return list(
filter(
lambda items: (
str(items["description"]).lower().find(filters.lower())
!= -1
or str(items["endpoint"]).lower().find(filters.lower())
!= -1
or str(items["fieldName"]).lower().find(filters.lower())
!= -1
),
values,
)
)
if self.description is False:
return result["fieldName"].values.tolist()
return list(result.to_dict("records"))
return [i["fieldName"] for i in self._result]

return list(self._result)

def to_dataframe(
self,
Expand All @@ -64,8 +119,7 @@ def to_dataframe(
meta_prefix: Union[str, None] = None,
max_level: Union[int, None] = None,
search_fields: Union[List[str], str, None] = None,
search_value: Optional[str] = None,
allow_substring: bool = True,
search_value: str = "",
) -> DataFrame:
"""[summary]
Creates a pandas DataFrame for the Results
Expand All @@ -82,36 +136,16 @@ def to_dataframe(
value = DataFrame(columns=column_names, index=Index([], dtype="int"))
if isinstance(search_fields, str):
search_fields = [search_fields]
if allow_substring:
for i in search_fields:
value = (
concat(
[
value,
data_frame[
data_frame[i].str.contains(
search_value, case=False, na=False
)
],
]
)
.drop_duplicates()
.reset_index(drop=True)
)
else:
for i in search_fields:
value = (
concat(
[
value,
data_frame[
data_frame[i].str.lower() == search_value.lower()
],
]
)
.drop_duplicates()
.reset_index(drop=True)
)
for i in search_fields:
value = merge(
value,
data_frame[
data_frame[i].str.contains(search_value, case=False, na=False)
],
how="right",
right_on=column_names,
left_on=column_names,
)
return value
if self.format_type == "tsv":
return self._df
Expand Down
40 changes: 16 additions & 24 deletions cdapython/results/string_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This class inheritance from the result class it is made for unique terms function
in the utility class,just to add a different to to_list
"""
from typing import List, Optional, Any
from typing import List, Optional

from cda_client.api.query_api import QueryApi
from cda_client.model.query_response_data import QueryResponseData
Expand Down Expand Up @@ -38,43 +38,35 @@ def __init__(
format_type,
)

def to_list(
self,
search_value: Optional[str] = None,
allow_substring: bool = True,
) -> List[Any]:
def to_list(self, filters: Optional[str] = None, exact: bool = False) -> list:
"""_summary_
this overloads the base Result to_list function
Args:
allow_substring (bool, optional): Whether the seach_value should match if it is only part of a word. Defaults to True.
search_fields (Union[str, List[str], None]): _description_. Defaults to None.
search_value (Optional[str], optional): _description_. Defaults to None.
filters (Optional[str], optional): _description_. Defaults to None.
exact (bool, optional): _description_. Defaults to False.
Returns:
List[Any]: _description_
list: _description_
"""
if search_value is not None:
values: list["StringResult"] = [
if filters is not None and filters != "":
filters = filters.replace("\n", " ").strip()
values: List["StringResult"] = [
list(i.values())[0]
for i in self._api_response.result
if list(i.values())[0] is not None
]

if allow_substring:
# concatenate all search values
# values = list(filter(None, values))
if exact:
return list(
filter(
lambda term: (
str(term).lower().find(str(search_value.lower())) != -1
),
lambda items: (str(items).lower() == filters.lower()),
values,
)
)
else:
return list(
filter(
lambda term: (term.lower() in search_value.lower()),
values,
)
return list(
filter(
lambda items: (str(items).lower().find(str(filters.lower())) != -1),
values,
)
)
return [list(i.values())[0] for i in self._api_response.result]
6 changes: 3 additions & 3 deletions notebooks/BuildingACohort.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@
}
],
"source": [
"columns().to_list(search_value=\"diagnosis\")"
"columns().to_list(filters=\"diagnosis\")"
]
},
{
Expand Down Expand Up @@ -1117,7 +1117,7 @@
"metadata": {},
"outputs": [],
"source": [
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(search_value=\"uter\")"
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"uter\")"
]
},
{
Expand All @@ -1138,7 +1138,7 @@
"metadata": {},
"outputs": [],
"source": [
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(search_value=\"cerv\")"
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"cerv\")"
]
},
{
Expand Down
16 changes: 2 additions & 14 deletions tests/colmdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,11 @@
print(
columns(
host="https://cancerdata.dsde-dev.broadinstitute.org/", description=False
).to_list(search_fields=["fieldName"], search_value="ma")
).to_list(filters="ma")
)

print(
columns(
host="https://cancerdata.dsde-dev.broadinstitute.org/", description=True
).to_list(
search_fields=["fieldName", "endpoint"],
search_value="tumor",
allow_substring=True,
)
)

print(
columns(
host="https://cancerdata.dsde-dev.broadinstitute.org/", description=True
).to_dataframe(
search_fields=["description"], search_value="tumor", allow_substring=False
)
).to_list(filters="ma")
)
2 changes: 1 addition & 1 deletion tests/dataframe_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"# df = columns().to_dataframe()\n",
"# df[df[col].str.contains(val, case=False, na=False)]\n",
"\"\"\" to search with to_dataframe function you need the column name and the value use : to search like this Description:GDC \"\"\"\n",
"columns().to_list(search_value=\"AF\") "
"columns().to_list(filters=\"AF\") "
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions tests/rich_file_id_problim.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def test():
columns().to_list()

columns().to_list(search_value="primary_diagnosis_site")
unique_terms("ResearchSubject.primary_diagnosis_site").to_list(search_value="ov")
columns().to_list(filters="primary_diagnosis_site")
unique_terms("ResearchSubject.primary_diagnosis_site").to_list(filters="ov")

r = Q("ResearchSubject.primary_diagnosis_site = 'Ovary'").file.run(host=host)
17 changes: 0 additions & 17 deletions tests/test_C_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,3 @@ def test_unique_terms() -> None:
"sex", "GDC", host=integration_host, table=integration_table
).to_list()
assert "female" in terms


def test_unique_terms_search_partial() -> None:
terms = unique_terms(
"sex", "GDC", host=integration_host, table=integration_table
).to_list(search_value="male")
assert "female" in terms
assert "male" in terms
assert "unknown" not in terms


def test_unique_terms_search() -> None:
terms = unique_terms(
"sex", "GDC", host=integration_host, table=integration_table
).to_list(search_value="male", allow_substring=False)
assert "male" in terms
assert "female" not in terms
1 change: 0 additions & 1 deletion tests/test_unique_terms_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def test_unique_terms_convert() -> None:
show_counts=True,
async_req=True,
).to_dataframe()
print(d)


test_unique_terms_convert()

0 comments on commit f1229da

Please sign in to comment.