Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions coderdata/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def format(
values='transcriptomics',
index='entrez_id',
columns='improve_sample_id'
)
).transpose()

elif data_type == "mutations":
if data.mutations is None:
Expand All @@ -516,7 +516,7 @@ def format(
columns='improve_sample_id',
values='exists',
fill_value=0,
)
).transpose()

elif data_type == "copy_number":
if data.copy_number is None:
Expand All @@ -531,7 +531,7 @@ def format(
columns='improve_sample_id',
values='copy_number',
aggfunc='mean',
)
).transpose()
if copy_call:
ret = ret.apply(
pd.cut,
Expand All @@ -552,7 +552,7 @@ def format(
values='proteomics',
index='entrez_id',
columns='improve_sample_id'
)
).transpose()

elif data_type == "experiments":
if data.experiments is None:
Expand Down Expand Up @@ -936,13 +936,16 @@ def train_test_validate(
sss_1.split(X=df_full, y=df_full['split_class'])
)
df_train = df_full.iloc[idx_train]
df_train = df_train.drop(labels=['split_class'], axis=1)
df_other = df_full.iloc[idx_other]
# Splitting 'other' further into test and validate
idx_test, idx_val = next(
sss_2.split(X=df_other, y=df_other['split_class'])
)
df_test = df_other.iloc[idx_test]
df_test = df_test.drop(labels=['split_class'], axis=1)
df_val = df_other.iloc[idx_val]
df_val = df_val.drop(labels=['split_class'], axis=1)

# using StratifiedGroupKSplit for the stratified drug-/sample-
# blind splits.
Expand Down
2 changes: 1 addition & 1 deletion coderdata/download/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def download(
file
for file
in data['files']
if file['name'].startswith(name)
if file['name'].startswith(name) or 'genes' in file['name']
]
else:
filtered_files = data['files']
Expand Down