From 26d0f04be6233604269baf960530af7c1db201e5 Mon Sep 17 00:00:00 2001 From: Joe Robertson Date: Tue, 15 Aug 2023 15:49:58 -0700 Subject: [PATCH] Wrap pandas read_csv method to handle new version. --- workflow/tests/compare.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/workflow/tests/compare.py b/workflow/tests/compare.py index f6526e6dc9..99f13a027b 100644 --- a/workflow/tests/compare.py +++ b/workflow/tests/compare.py @@ -45,8 +45,8 @@ def results(self, aggregate_column=None, aggregate_function=None, excludes=[], e print("Warning: %s not found. Skipping..." % feature_file) continue - base_df = pd.read_csv(base_file, index_col=0) - feature_df = pd.read_csv(feature_file, index_col=0) + base_df = read_csv(base_file, index_col=0) + feature_df = read_csv(feature_file, index_col=0) base_df = self.intersect_rows(base_df, feature_df) feature_df = self.intersect_rows(feature_df, base_df) @@ -151,14 +151,14 @@ def visualize(self, aggregate_column=None, aggregate_function=None, display_colu files.append(file) if display_columns or aggregate_columns: - base_characteristics_df = pd.read_csv( + base_characteristics_df = read_csv( os.path.join( self.base_folder, 'results_characteristics.csv'), index_col=0)[ display_columns + aggregate_columns] - feature_characteristics_df = pd.read_csv( + feature_characteristics_df = read_csv( os.path.join( self.feature_folder, 'results_characteristics.csv'), @@ -209,8 +209,8 @@ def remove_columns(cols): print("Warning: %s not found. Skipping..." % feature_file) continue - base_df = pd.read_csv(base_file, index_col=0) - feature_df = pd.read_csv(feature_file, index_col=0) + base_df = read_csv(base_file, index_col=0) + feature_df = read_csv(feature_file, index_col=0) base_df = self.intersect_rows(base_df, feature_df) feature_df = self.intersect_rows(feature_df, base_df) @@ -334,6 +334,12 @@ def remove_columns(cols): auto_open=False) +def read_csv(csv_file_path, **kwargs) -> pd.DataFrame: + default_na_values = pd._libs.parsers.STR_NA_VALUES + df = pd.read_csv(csv_file_path, na_values=list(default_na_values - {'None'}), keep_default_na=False, **kwargs) + return df + + if __name__ == '__main__': default_base_folder = 'workflow/tests/base_results'