# Savitzky-golay filter

In [8]:
import plotly.express as px
from scipy.signal import savgol_filter
import numpy as np
import polars as pl

In [29]:
x = np.linspace(start=-5, stop=5, num=100)
y = np.exp(-(x**2))
df = pl.DataFrame({"x": x, "y": y})

In [100]:
df

x,datapoints
f64,i32
-4.26,1
-3.77,1
-3.69,1
-3.59,1
-3.46,1
…,…
3.4,1
3.51,1
3.61,1
3.86,1


In [105]:
x = np.random.normal(
    loc=0,
    scale=1,
    size=10000,
)

rounding = 3
df = pl.DataFrame({"x": x})
df = (
    df
    #
    # .with_columns(pl.col("x").cast(pl.Decimal(scale=10)))
    .with_columns(pl.col("x").round(rounding), pl.lit(1).alias("one"))
    .group_by("x")
    .agg(pl.col("one").sum().alias("datapoints"))
    .sort("x")
)

px.line(
    df,
    x="x",
    y="datapoints",
    orientation="v",
)

In [130]:
# Compute the minimal step
step = round(
    df
    #
    .sort("x")
    .with_columns(pl.col("x").shift(1).alias("x_shift"))
    .with_columns((pl.col("x") - pl.col("x_shift")).alias("delta"))
    .select(pl.col("delta").median())
    .to_dicts()[0]["delta"],
    5,
)
print(f"Minimal step: {step}")


# Produce 0 when we don't have data
df = (
    df
    #
    .join(
        pl.DataFrame(
            {
                "x": np.arange(
                    df.select(pl.col("x").min()).to_numpy()[0],
                    df.select(pl.col("x").max()).to_numpy()[0],
                    step,
                )
            }
        ),
        on="x",
        how="full",
    )
    .with_columns(
        pl.coalesce("x", "x_right"),
        pl.col("datapoints").fill_null(0),
    )
    .with_columns(pl.col("x").round(rounding))
    .group_by("x")
    .agg(pl.col("datapoints").sum())
    # .drop("x_right")
    .sort("x")
)


df_plot = (
    df
    #
    .sort("x")
)
import itertools

for window_length, polyorder in itertools.product(
    range(1, 97, 16),
    range(1, 10, 2),
):
    if polyorder >= window_length:
        continue
    df_plot = df_plot.with_columns(
        pl.col("datapoints")
        .map_batches(
            lambda x: savgol_filter(
                x,
                window_length=window_length,
                polyorder=polyorder,
            )
        )
        .clip(lower_bound=0)
        .alias(f"savgol_w{window_length}_p{polyorder}")
    )


px.line(
    df_plot.unpivot(index="x").sort("x", "variable"),
    x="x",
    y="value",
    # color="variable",
    facet_col="variable",
    facet_col_wrap=5,
    height=800,
).update_yaxes(matches=None).update_layout(showlegend=False)

Minimal step: 0.001



Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)

