In [3]:
import polars as pl

In [10]:
# Import products.csv
products = pl.read_csv("../data/raw/products_macro.csv")

# Clean price column: Remove the € symbol, replace commas with periods, and convert to float
products = products.with_columns(
    pl.col("price")
    .str.replace("€", "")  # Remove the € symbol
    .str.replace(r"[^\d,.-]", "")  # Remove any non-numeric characters except commas and dots
    .str.replace(",", ".")  # Replace commas with periods for decimal point consistency
    .cast(pl.Float32)  # Convert to float
    .alias("price")
)

# Check the first 5 rows after cleaning
print(products.head(5))


shape: (5, 7)
┌──────────┬───────────────┬───────────────┬───────┬───────────────┬───────────────┬───────────────┐
│ Category ┆ name          ┆ subtitle      ┆ price ┆ discount_pric ┆ main_image_ur ┆ secondary_ima │
│ ---      ┆ ---           ┆ ---           ┆ ---   ┆ e             ┆ l             ┆ ge_url        │
│ str      ┆ str           ┆ str           ┆ f32   ┆ ---           ┆ ---           ┆ ---           │
│          ┆               ┆               ┆       ┆ str           ┆ str           ┆ str           │
╞══════════╪═══════════════╪═══════════════╪═══════╪═══════════════╪═══════════════╪═══════════════╡
│ Fruta    ┆ Plátano de    ┆ Pieza 170 g   ┆ 0.36  ┆ null          ┆ https://prod- ┆ https://prod- │
│          ┆ Canarias IGP  ┆ aprox.        ┆       ┆               ┆ mercadona.img ┆ mercadona.img │
│          ┆               ┆               ┆       ┆               ┆ ix.n…         ┆ ix.n…         │
│ Fruta    ┆ Banana        ┆ Pieza 170 g   ┆ 0.25  ┆ null          ┆ https://

In [13]:
print(products.sort("price", descending=True))

shape: (4_771, 7)
┌──────────┬───────────────┬───────────────┬───────┬───────────────┬───────────────┬───────────────┐
│ Category ┆ name          ┆ subtitle      ┆ price ┆ discount_pric ┆ main_image_ur ┆ secondary_ima │
│ ---      ┆ ---           ┆ ---           ┆ ---   ┆ e             ┆ l             ┆ ge_url        │
│ str      ┆ str           ┆ str           ┆ f32   ┆ ---           ┆ ---           ┆ ---           │
│          ┆               ┆               ┆       ┆ str           ┆ str           ┆ str           │
╞══════════╪═══════════════╪═══════════════╪═══════╪═══════════════╪═══════════════╪═══════════════╡
│ Jamón    ┆ Jamón de      ┆ Pieza 9 kg    ┆ 391.5 ┆ null          ┆ https://prod- ┆ https://prod- │
│ serrano  ┆ bellota       ┆ aprox.        ┆       ┆               ┆ mercadona.img ┆ mercadona.img │
│          ┆ ibérico 50%   ┆               ┆       ┆               ┆ ix.n…         ┆ ix.n…         │
│          ┆ L…            ┆               ┆       ┆               ┆     