From c598287da93c1a0e606903ba6a78ecdbdbff2bea Mon Sep 17 00:00:00 2001 From: caichac-dhi <97288080+caichac-dhi@users.noreply.github.com> Date: Thu, 27 Apr 2023 09:37:36 +0200 Subject: [PATCH 1/6] fix small density-bug and added test --- fmskill/plot.py | 8 ++++++-- tests/test_multivariable_compare.py | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fmskill/plot.py b/fmskill/plot.py index c083665b8..98fe92cc8 100644 --- a/fmskill/plot.py +++ b/fmskill/plot.py @@ -536,12 +536,16 @@ def _scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): """ # Make linear-grid for interpolation - minxy = min(min(x), min(y))-binsize/2 - maxxy = max(max(x), max(y))+binsize/2 + minxy = min(min(x), min(y))-binsize + maxxy = max(max(x), max(y))+binsize # Center points of the bins cxy = np.arange(minxy, maxxy, binsize) # Edges of the bins exy = np.arange(minxy - binsize * 0.5, maxxy + binsize * 0.5, binsize) + if exy[-1]<=cxy[-1]: + #sometimes, given the bin size, the edges array ended before (left side) of the bins-center array + # in such case, and extra half-bin is added at the end + exy = np.arange(minxy - binsize * 0.5, maxxy + binsize, binsize) # Calculate 2D histogram histodata, exh, eyh = np.histogram2d(x, y, [exy, exy]) diff --git a/tests/test_multivariable_compare.py b/tests/test_multivariable_compare.py index 8813c75e8..d0cdf33d8 100644 --- a/tests/test_multivariable_compare.py +++ b/tests/test_multivariable_compare.py @@ -134,6 +134,8 @@ def test_mv_mm_scatter(cc): cc.scatter( model="SW_1", variable="Wind_speed", observation="F16_wind", skill_table=True ) + cc.scatter(model="SW_1", variable="Wind_speed", show_density=True,bins=19) + cc.scatter(model="SW_1", variable="Wind_speed", show_density=True,bins=21) assert True plt.close("all") From 2af629b315ca078e81045e2f91e377c539ee607d Mon Sep 17 00:00:00 2001 From: caichac-dhi <97288080+caichac-dhi@users.noreply.github.com> Date: Thu, 27 Apr 2023 10:50:42 +0200 Subject: [PATCH 2/6] histogram as private function --- fmskill/plot.py | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/fmskill/plot.py b/fmskill/plot.py index 98fe92cc8..a1581617a 100644 --- a/fmskill/plot.py +++ b/fmskill/plot.py @@ -256,7 +256,7 @@ def scatter( "if `show_density=True` then `show_hist` must be either `False` or `None`" ) # calculate density data - z = _scatter_density(x_sample, y_sample, binsize=binsize) + z = __scatter_density(x_sample, y_sample, binsize=binsize) idx = z.argsort() # Sort data by colormaps x_sample, y_sample, z = x_sample[idx], y_sample[idx], z[idx] @@ -515,8 +515,8 @@ def taylor_diagram( fig.suptitle(title, size="x-large") -def _scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): - """Interpolates scatter data on a 2D histogram (gridded) based on data density. +def __hist2d(x, y, binsize): + """Calculates 2D histogram (gridded) of data. Parameters ---------- @@ -525,16 +525,17 @@ def _scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): y: np.array Y values e.g observation values, must be same length as x binsize: float, optional - 2D grid resolution, by default = 0.1 - method: str, optional - Scipy griddata interpolation method, by default 'linear' + 2D histogram (bin) resolution, by default = 0.1 Returns ---------- - Z_grid: np.array - Array with the colors based on histogram density + histodata: np.array + 2D-histogram data + cxy: np.array + Center points of the histogram bins + exy: np.array + Edges of the histogram bins """ - # Make linear-grid for interpolation minxy = min(min(x), min(y))-binsize maxxy = max(max(x), max(y))+binsize @@ -550,6 +551,30 @@ def _scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): # Calculate 2D histogram histodata, exh, eyh = np.histogram2d(x, y, [exy, exy]) + return histodata,cxy,exy + +def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): + """Interpolates scatter data on a 2D histogram (gridded) based on data density. + + Parameters + ---------- + x: np.array + X values e.g model values, must be same length as y + y: np.array + Y values e.g observation values, must be same length as x + binsize: float, optional + 2D histogram (bin) resolution, by default = 0.1 + method: str, optional + Scipy griddata interpolation method, by default 'linear' + + Returns + ---------- + Z_grid: np.array + Array with the colors based on histogram density + """ + + histodata,cxy,exy=__hist2d(x, y, binsize) + # Histogram values hist = [] for j in range(len(cxy)): From 3c98edaefdc0137d0ff480560f5abb432a625df6 Mon Sep 17 00:00:00 2001 From: caichac-dhi <97288080+caichac-dhi@users.noreply.github.com> Date: Thu, 27 Apr 2023 10:54:14 +0200 Subject: [PATCH 3/6] all of hist as private --- fmskill/plot.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fmskill/plot.py b/fmskill/plot.py index a1581617a..3f668a285 100644 --- a/fmskill/plot.py +++ b/fmskill/plot.py @@ -551,7 +551,13 @@ def __hist2d(x, y, binsize): # Calculate 2D histogram histodata, exh, eyh = np.histogram2d(x, y, [exy, exy]) - return histodata,cxy,exy + # Histogram values + hist = [] + for j in range(len(cxy)): + for i in range(len(cxy)): + hist.append(histodata[i, j]) + + return hist,cxy,exy def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): """Interpolates scatter data on a 2D histogram (gridded) based on data density. @@ -573,13 +579,7 @@ def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): Array with the colors based on histogram density """ - histodata,cxy,exy=__hist2d(x, y, binsize) - - # Histogram values - hist = [] - for j in range(len(cxy)): - for i in range(len(cxy)): - hist.append(histodata[i, j]) + hist,cxy,exy=__hist2d(x, y, binsize) # Grid-data xg, yg = np.meshgrid(cxy, cxy) From 3fe95d5d13c2aa3b50f729471a20c5d9434801c8 Mon Sep 17 00:00:00 2001 From: caichac-dhi <97288080+caichac-dhi@users.noreply.github.com> Date: Mon, 8 May 2023 14:27:58 +0200 Subject: [PATCH 4/6] remove unused values --- fmskill/plot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fmskill/plot.py b/fmskill/plot.py index 3f668a285..68fbf74ca 100644 --- a/fmskill/plot.py +++ b/fmskill/plot.py @@ -549,7 +549,7 @@ def __hist2d(x, y, binsize): exy = np.arange(minxy - binsize * 0.5, maxxy + binsize, binsize) # Calculate 2D histogram - histodata, exh, eyh = np.histogram2d(x, y, [exy, exy]) + histodata, exh, _ = np.histogram2d(x, y, [exy, exy]) # Histogram values hist = [] @@ -557,7 +557,7 @@ def __hist2d(x, y, binsize): for i in range(len(cxy)): hist.append(histodata[i, j]) - return hist,cxy,exy + return hist,cxy def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): """Interpolates scatter data on a 2D histogram (gridded) based on data density. @@ -579,7 +579,7 @@ def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): Array with the colors based on histogram density """ - hist,cxy,exy=__hist2d(x, y, binsize) + hist,cxy=__hist2d(x, y, binsize) # Grid-data xg, yg = np.meshgrid(cxy, cxy) From 9e0fab009f6a2cc3282b82d7f38ea3dc2e51e813 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 10 May 2023 16:36:30 +0200 Subject: [PATCH 5/6] format with black --- fmskill/plot.py | 52 ++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/fmskill/plot.py b/fmskill/plot.py index 68fbf74ca..368f69c6f 100644 --- a/fmskill/plot.py +++ b/fmskill/plot.py @@ -56,6 +56,7 @@ # register_option("plot.scatter.table.show", False, validator=settings.is_bool) register_option("plot.scatter.legend.fontsize", 12, validator=settings.is_positive) + def scatter( x, y, @@ -152,14 +153,14 @@ def scatter( x_sample = x y_sample = y - sample_warning=False + sample_warning = False if show_points is None: # If nothing given, and more than 50k points, 50k sample will be shown if len(x) < 5e4: show_points = True else: show_points = 50000 - sample_warning=True + sample_warning = True if type(show_points) == float: if show_points < 0 or show_points > 1: raise ValueError(" `show_points` fraction must be in [0,1]") @@ -170,31 +171,30 @@ def scatter( ) x_sample = x[ran_index] y_sample = y[ran_index] - if len(x_sample)= 3000: quantiles = 1000 @@ -224,10 +224,10 @@ def scatter( # Remove previous piece of code when nbins and bin_size are deprecated. if xlim is None: - xlim = [xymin - binsize, xymax+ binsize] + xlim = [xymin - binsize, xymax + binsize] if ylim is None: - ylim = [xymin - binsize, xymax+ binsize] + ylim = [xymin - binsize, xymax + binsize] if type(quantiles) == int: xq = np.quantile(x, q=np.linspace(0, 1, num=quantiles)) @@ -236,8 +236,8 @@ def scatter( # if not an int nor None, it must be a squence of floats xq = np.quantile(x, q=quantiles) yq = np.quantile(y, q=quantiles) - x_trend= np.array([xlim[0],xlim[1]]) - + x_trend = np.array([xlim[0], xlim[1]]) + if show_hist: # if histogram is wanted (explicit non-default flag) then density is off if show_density == True: @@ -259,10 +259,9 @@ def scatter( z = __scatter_density(x_sample, y_sample, binsize=binsize) idx = z.argsort() # Sort data by colormaps - x_sample, y_sample, z = x_sample[idx], y_sample[idx], z[idx] + x_sample, y_sample, z = x_sample[idx], y_sample[idx], z[idx] # scale Z by sample size - z = z * len(x) / len(x_sample) - + z = z * len(x) / len(x_sample) # linear fit slope, intercept = _linear_regression(obs=x, model=y, reg_method=reg_method) @@ -274,8 +273,8 @@ def scatter( reglabel = f"Fit: y={slope:.2f}x{sign}{intercept:.2f}" if backend == "matplotlib": - _,ax=plt.subplots(figsize=figsize) - #plt.figure(figsize=figsize) + _, ax = plt.subplots(figsize=figsize) + # plt.figure(figsize=figsize) plt.plot( [xlim[0], xlim[1]], [xlim[0], xlim[1]], @@ -328,7 +327,7 @@ def scatter( plt.xlim([xlim[0], xlim[1]]) plt.ylim([ylim[0], ylim[1]]) plt.minorticks_on() - plt.grid(which="both", axis="both", linewidth="0.2", color="k",alpha=0.6) + plt.grid(which="both", axis="both", linewidth="0.2", color="k", alpha=0.6) max_cbar = None if show_hist or (show_density and show_points): cbar = plt.colorbar(fraction=0.046, pad=0.04) @@ -537,14 +536,14 @@ def __hist2d(x, y, binsize): Edges of the histogram bins """ # Make linear-grid for interpolation - minxy = min(min(x), min(y))-binsize - maxxy = max(max(x), max(y))+binsize + minxy = min(min(x), min(y)) - binsize + maxxy = max(max(x), max(y)) + binsize # Center points of the bins cxy = np.arange(minxy, maxxy, binsize) # Edges of the bins exy = np.arange(minxy - binsize * 0.5, maxxy + binsize * 0.5, binsize) - if exy[-1]<=cxy[-1]: - #sometimes, given the bin size, the edges array ended before (left side) of the bins-center array + if exy[-1] <= cxy[-1]: + # sometimes, given the bin size, the edges array ended before (left side) of the bins-center array # in such case, and extra half-bin is added at the end exy = np.arange(minxy - binsize * 0.5, maxxy + binsize, binsize) @@ -557,7 +556,8 @@ def __hist2d(x, y, binsize): for i in range(len(cxy)): hist.append(histodata[i, j]) - return hist,cxy + return hist, cxy + def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): """Interpolates scatter data on a 2D histogram (gridded) based on data density. @@ -579,7 +579,7 @@ def __scatter_density(x, y, binsize: float = 0.1, method: str = "linear"): Array with the colors based on histogram density """ - hist,cxy=__hist2d(x, y, binsize) + hist, cxy = __hist2d(x, y, binsize) # Grid-data xg, yg = np.meshgrid(cxy, cxy) From 06215d2e75426012ea9224af92d8b9d7851a6967 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 10 May 2023 16:53:01 +0200 Subject: [PATCH 6/6] Fixes suggested by ruff --- fmskill/plot.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fmskill/plot.py b/fmskill/plot.py index 368f69c6f..3ba24aeb7 100644 --- a/fmskill/plot.py +++ b/fmskill/plot.py @@ -134,7 +134,7 @@ def scatter( user default units to override default units, eg 'metre', by default None kwargs """ - if show_hist == None and show_density == None: + if show_hist is None and show_density is None: # Default: points density show_density = True @@ -240,7 +240,7 @@ def scatter( if show_hist: # if histogram is wanted (explicit non-default flag) then density is off - if show_density == True: + if show_density is True: raise TypeError( "if `show_hist=True` then `show_density` must be either `False` or `None`" ) @@ -251,7 +251,7 @@ def scatter( "if `show_density=True` then bins must be either float or int" ) # if point density is wanted, then 2D histogram is not shown - if show_hist == True: + if show_hist is True: raise TypeError( "if `show_density=True` then `show_hist` must be either `False` or `None`" ) @@ -337,7 +337,7 @@ def scatter( plt.title(title) # Add skill table - if skill_df != None: + if skill_df is not None: _plot_summary_table(skill_df, units, max_cbar=max_cbar) return ax @@ -548,7 +548,7 @@ def __hist2d(x, y, binsize): exy = np.arange(minxy - binsize * 0.5, maxxy + binsize, binsize) # Calculate 2D histogram - histodata, exh, _ = np.histogram2d(x, y, [exy, exy]) + histodata, _, _ = np.histogram2d(x, y, [exy, exy]) # Histogram values hist = [] @@ -620,7 +620,7 @@ def _plot_summary_table(skill_df, units, max_cbar): text_ = "\n".join(lines) - if max_cbar == None: + if max_cbar is None: x = 0.93 elif max_cbar < 1e3: x = 0.99