Skip to content

Commit

Permalink
Fixed pandas diagnostics for pandas>=2.0.0 (#3209)
Browse files Browse the repository at this point in the history
Co-authored-by: Rémi Kazeroni <remi.kazeroni@dlr.de>
  • Loading branch information
2 people authored and Javier Vegas-Regidor committed Jan 14, 2024
1 parent febb3a1 commit 42df8f4
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 37 deletions.
23 changes: 14 additions & 9 deletions esmvaltool/diag_scripts/climate_metrics/create_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@
logger = logging.getLogger(os.path.basename(__file__))

EXCLUDE_VAL = 0
PANDAS_PRINT_OPTIONS = ['display.max_rows', None, 'display.max_colwidth', -1]
PANDAS_PRINT_OPTIONS = [
'display.max_rows', None,
'display.max_colwidth', None,
]


def _add_numerical_index(data_frame, exclude_datasets):
Expand All @@ -72,7 +75,7 @@ def _add_numerical_index(data_frame, exclude_datasets):
def _calculate_statistic(data_frame, stat_func, exclude_datasets):
"""Calculate statistic."""
projects = data_frame.index.get_level_values('project')
series_to_append = []
dfs_to_append = []
for project in list(set(projects)):
sub_data_frame = data_frame.loc[projects == project]
datasets = sub_data_frame.index.get_level_values('dataset')
Expand All @@ -84,10 +87,10 @@ def _calculate_statistic(data_frame, stat_func, exclude_datasets):
index=data_frame.columns,
name=(project, f'--{stat_func.__name__.upper()}--', EXCLUDE_VAL),
)
series_to_append.append(series)
for series in series_to_append:
data_frame = data_frame.append(series)
data_frame = data_frame.sort_index()
df_to_append = series.to_frame().T
df_to_append.index.names = data_frame.index.names
dfs_to_append.append(df_to_append)
data_frame = pd.concat([data_frame] + dfs_to_append).sort_index()
return data_frame


Expand Down Expand Up @@ -131,9 +134,11 @@ def create_data_frame(input_files, exclude_datasets):
series = pd.Series(data=cube.data, index=index)

# Expand index
for row in series.index.difference(data_frame.index):
data_frame = data_frame.append(pd.Series(name=row,
dtype=cube.dtype))
rows_to_add = [
pd.Series(name=row, dtype=cube.dtype).to_frame().T for row in
series.index.difference(data_frame.index)
]
data_frame = pd.concat([data_frame] + rows_to_add)

# Add new data
if cube.var_name in data_frame.columns:
Expand Down
40 changes: 23 additions & 17 deletions esmvaltool/diag_scripts/emergent_constraints/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
'bbox_to_anchor': [1.05, 0.5],
'borderaxespad': 0.0,
}
PANDAS_PRINT_OPTIONS = ['display.max_rows', None, 'display.max_colwidth', -1]
PANDAS_PRINT_OPTIONS = [
'display.max_rows', None,
'display.max_colwidth', None,
]


def _check_x_y_arrays(x_array, y_array):
Expand Down Expand Up @@ -68,11 +71,11 @@ def _check_x_y_arrays(x_array, y_array):

def _add_column(data_frame, series, column_name):
"""Add column to :class:`pandas.DataFrame` (expands index if necessary)."""
for row in series.index.difference(data_frame.index):
data_frame = pd.concat([
data_frame,
pd.Series(name=row, dtype=np.float64).to_frame().T,
])
rows_to_add = [
pd.Series(name=row, dtype=np.float64).to_frame().T for row in
series.index.difference(data_frame.index)
]
data_frame = pd.concat([data_frame] + rows_to_add)
if column_name in data_frame.columns:
for row in series.index:
if np.isnan(data_frame.loc[row, column_name]):
Expand Down Expand Up @@ -808,14 +811,16 @@ def get_input_data(cfg):
label_all_data, group_by)

# Unify indices of features and label
for row in features.index.difference(label.index):
label = pd.concat(
[label, pd.Series(name=row, dtype=np.float64).to_frame().T]
)
for row in label.index.difference(features.index):
features = pd.concat(
[features, pd.Series(name=row, dtype=np.float64).to_frame().T]
)
rows_to_add_to_label = [
pd.Series(name=row, dtype=np.float64).to_frame().T for row in
features.index.difference(label.index)
]
label = pd.concat([label] + rows_to_add_to_label)
rows_to_add_to_features = [
pd.Series(name=row, dtype=np.float64).to_frame().T for row in
label.index.difference(features.index)
]
features = pd.concat([features] + rows_to_add_to_features)

# Sort data frames
for data_frame in (features, label, pred_input, pred_input_err):
Expand Down Expand Up @@ -1394,7 +1399,7 @@ def plot_target_distributions(training_data, pred_input_data, attributes,
add_combined_group=cfg['combine_groups'])
summary_columns = pd.MultiIndex.from_product(
[groups, ['best estimate', 'range', 'min', 'max']])
summary = pd.DataFrame(columns=summary_columns, dtype=np.float64)
summaries = []

# Iterate over features
for feature in training_data.x.columns:
Expand Down Expand Up @@ -1455,8 +1460,8 @@ def plot_target_distributions(training_data, pred_input_data, attributes,
summary_for_feature[(group, 'min')] = y_min
summary_for_feature[(group, 'max')] = y_max

# Save results to feature
summary = pd.concat([summary, summary_for_feature.to_frame().T])
# Save results for feature
summaries.append(summary_for_feature.to_frame().T)

# Plot appearance
set_plot_appearance(axes, attributes, plot_title=feature)
Expand Down Expand Up @@ -1485,6 +1490,7 @@ def plot_target_distributions(training_data, pred_input_data, attributes,
provenance_logger.log(plot_path, provenance_record)

# Print mean results
summary = pd.concat(summaries)
with pd.option_context(*PANDAS_PRINT_OPTIONS):
logger.info("Constrained ranges:\n%s", summary)
summary = summary.mean(axis=0)
Expand Down
24 changes: 14 additions & 10 deletions esmvaltool/diag_scripts/mlr/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2098,13 +2098,12 @@ def _extract_x_data(self, datasets, var_type):
raise ValueError(
f"Excepted one of '{allowed_types}' for 'var_type', got "
f"'{var_type}'")
x_data = pd.DataFrame(columns=self.features, dtype=self._cfg['dtype'])
x_data_for_groups = []
x_cube = None
if self._cfg['weighted_samples'] and var_type == 'feature':
sample_weights = pd.DataFrame(columns=['sample_weight'],
dtype=self._cfg['dtype'])
sample_weights_for_groups = []
else:
sample_weights = None
sample_weights_for_groups = None

# Iterate over datasets
datasets = select_metadata(datasets, var_type=var_type)
Expand All @@ -2123,14 +2122,15 @@ def _extract_x_data(self, datasets, var_type):
(group_data, x_cube,
weights) = self._get_x_data_for_group(group_datasets, var_type,
group_attr)
x_data = pd.concat([x_data, group_data])
x_data_for_groups.append(group_data)

# Append weights if desired
if sample_weights is not None:
sample_weights = pd.concat([sample_weights, weights])
if sample_weights_for_groups is not None:
sample_weights_for_groups.append(weights)

# Adapt sample_weights if necessary
if sample_weights is not None:
if sample_weights_for_groups is not None:
sample_weights = pd.concat(sample_weights_for_groups)
sample_weights.index = pd.MultiIndex.from_tuples(
sample_weights.index, names=self._get_multiindex_names())
logger.info(
Expand All @@ -2145,8 +2145,11 @@ def _extract_x_data(self, datasets, var_type):
"cubes",
sample_weights.min().values[0],
sample_weights.max().values[0])
else:
sample_weights = None

# Convert index back to MultiIndex
x_data = pd.concat(x_data_for_groups)
x_data.index = pd.MultiIndex.from_tuples(
x_data.index, names=self._get_multiindex_names())

Expand All @@ -2159,7 +2162,7 @@ def _extract_y_data(self, datasets, var_type):
raise ValueError(
f"Excepted one of '{allowed_types}' for 'var_type', got "
f"'{var_type}'")
y_data = pd.DataFrame(columns=[self.label], dtype=self._cfg['dtype'])
y_data_for_groups = []

# Iterate over datasets
datasets = select_metadata(datasets, var_type=var_type)
Expand All @@ -2186,9 +2189,10 @@ def _extract_y_data(self, datasets, var_type):
index=self._get_multiindex(cube, group_attr=group_attr),
dtype=self._cfg['dtype'],
)
y_data = pd.concat([y_data, cube_data])
y_data_for_groups.append(cube_data)

# Convert index back to MultiIndex
y_data = pd.concat(y_data_for_groups)
y_data.index = pd.MultiIndex.from_tuples(
y_data.index, names=self._get_multiindex_names())

Expand Down
2 changes: 1 addition & 1 deletion esmvaltool/diag_scripts/mlr/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ def main(cfg):
if cfg['print_corr']:
pandas_print_options = [
'display.max_rows', None,
'display.max_colwidth', -1,
'display.max_colwidth', None,
]
corr = ALL_CUBES.corr()
with pd.option_context(*pandas_print_options):
Expand Down

0 comments on commit 42df8f4

Please sign in to comment.