Skip to content
This repository has been archived by the owner on Apr 17, 2024. It is now read-only.

revert the to_list and to_dataframe changes #196

Merged
merged 1 commit into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion cdapython/results/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def to_dataframe(
max_level: Union[int, None] = None,
search_fields: Union[List[str], str, None] = None,
search_value: str = "",
allow_substring: bool = True,
) -> DataFrame:
"""[summary]
Creates a pandas DataFrame for the Results
Expand Down
142 changes: 88 additions & 54 deletions cdapython/results/columns_result.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
from typing import Any, Dict, List, Optional, Union

from pandas import DataFrame, Index, json_normalize, concat
from pandas import DataFrame, Index, json_normalize, merge
from typing_extensions import Literal, TypedDict

from cdapython.results.base import BaseResult


class _Column_Types(TypedDict):
"""
This is made for typechecking a dict
Args:
TypedDict (_type_): _description_
"""

fieldName: str
endpoint: str
description: str
mode: str


_Column_str = Union[
Literal["fieldName"], Literal["endpoint"], Literal["description"], Literal["mode"]
]


class ColumnsResult(BaseResult):
def __init__(
self,
Expand Down Expand Up @@ -33,29 +52,65 @@ def __str__(self) -> str:
return self._repr_value(show_value=self.show_sql)

def to_list(
self,
search_fields: Union[str, List[str], None] = None,
search_value: Union[str, None] = None,
allow_substring: bool = True,
self, filters: Optional[str] = None, exact: bool = False, endpoint: str = ""
) -> List[Any]:
"""_summary_

Args:
allow_substring (bool, optional): Whether the seach_value should match if it is only part of a word. Defaults to True.
search_fields (Union[str, List[str], None]): _description_. Defaults to None.
search_value (Optional[str], optional): _description_. Defaults to None.

Returns:
List[Any]: _description_
"""
result = self.to_dataframe(
search_fields=search_fields,
search_value=search_value,
allow_substring=allow_substring,
)
if filters is not None and filters != "":
values: Union[List[_Column_Types], List[Any]] = []
filters = filters.replace("\n", " ").strip()
if self.description is False:
values.extend(
[i["fieldName"] for i in self._result if i["fieldName"] is not None]
)
if self.description:
values.extend([i for i in self._result if list(i) is not None])
# values = list(filter(None, values))
if exact:
if self.description is False:
return list(
filter(
lambda items: (
str(items["fieldName"]).lower() == filters.lower()
),
values,
)
)
return list(
filter(
lambda items: (
str(items["description"]).lower() == filters.lower()
or str(items["endpoint"]).lower() == filters.lower()
or str(items["fieldName"]).lower() == filters.lower()
),
values,
)
)
else:
if self.description is False:
return list(
filter(
lambda items: (
str(items).lower().find(str(filters.lower())) != -1
),
values,
)
)
return list(
filter(
lambda items: (
str(items["description"]).lower().find(filters.lower())
!= -1
or str(items["endpoint"]).lower().find(filters.lower())
!= -1
or str(items["fieldName"]).lower().find(filters.lower())
!= -1
),
values,
)
)
if self.description is False:
return result["fieldName"].values.tolist()
return list(result.to_dict("records"))
return [i["fieldName"] for i in self._result]

return list(self._result)

def to_dataframe(
self,
Expand All @@ -64,8 +119,7 @@ def to_dataframe(
meta_prefix: Union[str, None] = None,
max_level: Union[int, None] = None,
search_fields: Union[List[str], str, None] = None,
search_value: Optional[str] = None,
allow_substring: bool = True,
search_value: str = "",
) -> DataFrame:
"""[summary]
Creates a pandas DataFrame for the Results
Expand All @@ -82,36 +136,16 @@ def to_dataframe(
value = DataFrame(columns=column_names, index=Index([], dtype="int"))
if isinstance(search_fields, str):
search_fields = [search_fields]
if allow_substring:
for i in search_fields:
value = (
concat(
[
value,
data_frame[
data_frame[i].str.contains(
search_value, case=False, na=False
)
],
]
)
.drop_duplicates()
.reset_index(drop=True)
)
else:
for i in search_fields:
value = (
concat(
[
value,
data_frame[
data_frame[i].str.lower() == search_value.lower()
],
]
)
.drop_duplicates()
.reset_index(drop=True)
)
for i in search_fields:
value = merge(
value,
data_frame[
data_frame[i].str.contains(search_value, case=False, na=False)
],
how="right",
right_on=column_names,
left_on=column_names,
)
return value
if self.format_type == "tsv":
return self._df
Expand Down
40 changes: 16 additions & 24 deletions cdapython/results/string_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This class inheritance from the result class it is made for unique terms function
in the utility class,just to add a different to to_list
"""
from typing import List, Optional, Any
from typing import List, Optional

from cda_client.api.query_api import QueryApi
from cda_client.model.query_response_data import QueryResponseData
Expand Down Expand Up @@ -38,43 +38,35 @@ def __init__(
format_type,
)

def to_list(
self,
search_value: Optional[str] = None,
allow_substring: bool = True,
) -> List[Any]:
def to_list(self, filters: Optional[str] = None, exact: bool = False) -> list:
"""_summary_
this overloads the base Result to_list function
Args:
allow_substring (bool, optional): Whether the seach_value should match if it is only part of a word. Defaults to True.
search_fields (Union[str, List[str], None]): _description_. Defaults to None.
search_value (Optional[str], optional): _description_. Defaults to None.
filters (Optional[str], optional): _description_. Defaults to None.
exact (bool, optional): _description_. Defaults to False.

Returns:
List[Any]: _description_
list: _description_
"""
if search_value is not None:
values: list["StringResult"] = [
if filters is not None and filters != "":
filters = filters.replace("\n", " ").strip()
values: List["StringResult"] = [
list(i.values())[0]
for i in self._api_response.result
if list(i.values())[0] is not None
]

if allow_substring:
# concatenate all search values
# values = list(filter(None, values))
if exact:
return list(
filter(
lambda term: (
str(term).lower().find(str(search_value.lower())) != -1
),
lambda items: (str(items).lower() == filters.lower()),
values,
)
)
else:
return list(
filter(
lambda term: (term.lower() in search_value.lower()),
values,
)
return list(
filter(
lambda items: (str(items).lower().find(str(filters.lower())) != -1),
values,
)
)
return [list(i.values())[0] for i in self._api_response.result]
6 changes: 3 additions & 3 deletions notebooks/BuildingACohort.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@
}
],
"source": [
"columns().to_list(search_value=\"diagnosis\")"
"columns().to_list(filters=\"diagnosis\")"
]
},
{
Expand Down Expand Up @@ -1117,7 +1117,7 @@
"metadata": {},
"outputs": [],
"source": [
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(search_value=\"uter\")"
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"uter\")"
]
},
{
Expand All @@ -1138,7 +1138,7 @@
"metadata": {},
"outputs": [],
"source": [
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(search_value=\"cerv\")"
"unique_terms(\"ResearchSubject.primary_diagnosis_site\").to_list(filters=\"cerv\")"
]
},
{
Expand Down
16 changes: 2 additions & 14 deletions tests/colmdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,11 @@
print(
columns(
host="https://cancerdata.dsde-dev.broadinstitute.org/", description=False
).to_list(search_fields=["fieldName"], search_value="ma")
).to_list(filters="ma")
)

print(
columns(
host="https://cancerdata.dsde-dev.broadinstitute.org/", description=True
).to_list(
search_fields=["fieldName", "endpoint"],
search_value="tumor",
allow_substring=True,
)
)

print(
columns(
host="https://cancerdata.dsde-dev.broadinstitute.org/", description=True
).to_dataframe(
search_fields=["description"], search_value="tumor", allow_substring=False
)
).to_list(filters="ma")
)
2 changes: 1 addition & 1 deletion tests/dataframe_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"# df = columns().to_dataframe()\n",
"# df[df[col].str.contains(val, case=False, na=False)]\n",
"\"\"\" to search with to_dataframe function you need the column name and the value use : to search like this Description:GDC \"\"\"\n",
"columns().to_list(search_value=\"AF\") "
"columns().to_list(filters=\"AF\") "
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions tests/rich_file_id_problim.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def test():
columns().to_list()

columns().to_list(search_value="primary_diagnosis_site")
unique_terms("ResearchSubject.primary_diagnosis_site").to_list(search_value="ov")
columns().to_list(filters="primary_diagnosis_site")
unique_terms("ResearchSubject.primary_diagnosis_site").to_list(filters="ov")

r = Q("ResearchSubject.primary_diagnosis_site = 'Ovary'").file.run(host=host)
17 changes: 0 additions & 17 deletions tests/test_C_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,3 @@ def test_unique_terms() -> None:
"sex", "GDC", host=integration_host, table=integration_table
).to_list()
assert "female" in terms


def test_unique_terms_search_partial() -> None:
terms = unique_terms(
"sex", "GDC", host=integration_host, table=integration_table
).to_list(search_value="male")
assert "female" in terms
assert "male" in terms
assert "unknown" not in terms


def test_unique_terms_search() -> None:
terms = unique_terms(
"sex", "GDC", host=integration_host, table=integration_table
).to_list(search_value="male", allow_substring=False)
assert "male" in terms
assert "female" not in terms
1 change: 0 additions & 1 deletion tests/test_unique_terms_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def test_unique_terms_convert() -> None:
show_counts=True,
async_req=True,
).to_dataframe()
print(d)


test_unique_terms_convert()