diff --git a/coderdata/dataset/dataset.py b/coderdata/dataset/dataset.py index c00a8661..e3ca6377 100644 --- a/coderdata/dataset/dataset.py +++ b/coderdata/dataset/dataset.py @@ -494,7 +494,7 @@ def format( values='transcriptomics', index='entrez_id', columns='improve_sample_id' - ) + ).transpose() elif data_type == "mutations": if data.mutations is None: @@ -516,7 +516,7 @@ def format( columns='improve_sample_id', values='exists', fill_value=0, - ) + ).transpose() elif data_type == "copy_number": if data.copy_number is None: @@ -531,7 +531,7 @@ def format( columns='improve_sample_id', values='copy_number', aggfunc='mean', - ) + ).transpose() if copy_call: ret = ret.apply( pd.cut, @@ -552,7 +552,7 @@ def format( values='proteomics', index='entrez_id', columns='improve_sample_id' - ) + ).transpose() elif data_type == "experiments": if data.experiments is None: @@ -936,13 +936,16 @@ def train_test_validate( sss_1.split(X=df_full, y=df_full['split_class']) ) df_train = df_full.iloc[idx_train] + df_train = df_train.drop(labels=['split_class'], axis=1) df_other = df_full.iloc[idx_other] # Splitting 'other' further into test and validate idx_test, idx_val = next( sss_2.split(X=df_other, y=df_other['split_class']) ) df_test = df_other.iloc[idx_test] + df_test = df_test.drop(labels=['split_class'], axis=1) df_val = df_other.iloc[idx_val] + df_val = df_val.drop(labels=['split_class'], axis=1) # using StratifiedGroupKSplit for the stratified drug-/sample- # blind splits. diff --git a/coderdata/download/downloader.py b/coderdata/download/downloader.py index a513963a..88587806 100644 --- a/coderdata/download/downloader.py +++ b/coderdata/download/downloader.py @@ -68,7 +68,7 @@ def download( file for file in data['files'] - if file['name'].startswith(name) + if file['name'].startswith(name) or 'genes' in file['name'] ] else: filtered_files = data['files']