# Filtering and Sorting Rows

In [88]:
import polars as pl

In [89]:
tools = pl.read_csv("data/tools.csv")
print(tools)

shape: (10, 6)
┌───────────────────────┬──────────────┬────────┬──────────┬───────┬───────┐
│ tool                  ┆ product      ┆ brand  ┆ cordless ┆ price ┆ rpm   │
│ ---                   ┆ ---          ┆ ---    ┆ ---      ┆ ---   ┆ ---   │
│ str                   ┆ str          ┆ str    ┆ bool     ┆ i64   ┆ i64   │
╞═══════════════════════╪══════════════╪════════╪══════════╪═══════╪═══════╡
│ Rotary Hammer         ┆ HR2230       ┆ Makita ┆ false    ┆ 199   ┆ 1050  │
│ Miter Saw             ┆ GCM 8 SJL    ┆ Bosch  ┆ false    ┆ 391   ┆ 5500  │
│ Plunge Cut Saw        ┆ DSP600ZJ     ┆ Makita ┆ true     ┆ 459   ┆ 6300  │
│ Impact Driver         ┆ DTD157Z      ┆ Makita ┆ true     ┆ 156   ┆ 3000  │
│ Jigsaw                ┆ PST 900 PEL  ┆ Bosch  ┆ false    ┆ 79    ┆ 3100  │
│ Angle Grinder         ┆ DGA504ZJ     ┆ Makita ┆ true     ┆ 229   ┆ 8500  │
│ Nail Gun              ┆ DPSB2IN1-XJ  ┆ DeWalt ┆ true     ┆ 129   ┆ null  │
│ Router                ┆ POF 1400 ACE ┆ Bosch  ┆ false    ┆ 

## Filtering Rows

### Filtering Based on Expressions

Ambas expresiones de filtrado en Polars son equivalentes:

- `tools.filter(pl.col("cordless") & (pl.col("brand") == "Makita"))`
- `tools.filter(pl.col("cordless"), pl.col("brand") == "Makita")`

La primera utiliza el operador `&` para combinar dos condiciones en una sola expresión booleana. La segunda pasa cada condición como argumento separado a `filter()`, que internamente los combina con `AND`. En ambos casos, el resultado es un DataFrame con las filas donde `cordless` es `True` y `brand` es `"Makita"`.

In [90]:
tools.filter(pl.col("cordless") & (pl.col("brand") == "Makita"))

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


In [91]:
tools.filter(pl.col("cordless"), pl.col("brand") == "Makita")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


## Filtering Based on Columns Names

In [92]:
tools.filter("cordless")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0


### Filtering Based on Contrains

In [93]:
tools.filter(cordless=True, brand="Makita")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


## Sorting Rows

### Sorting Bases on a Single Column

In [94]:
tools.sort("price")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0


| Argument         | Descripción                                                                                   |
|------------------|----------------------------------------------------------------------------------------------|
| by               | Nombre(s) de columna(s) o expresión(es) por las que ordenar. Puede ser un string, lista de strings o expresiones. |
| *more_by         | Columnas o expresiones adicionales para ordenar (argumentos variádicos).                      |
| descending       | Booleano o lista de booleanos que indica el orden para cada columna (`True` para descendente).|
| null_last        | Si es `True`, los valores nulos se ordenan al final; si es `False`, los nulos van primero.    |
| multithreaded    | Si es `True`, habilita ordenamiento multihilo para mayor rendimiento.                         |
| maintain_order   | Si es `True`, preserva el orden de elementos iguales (orden estable).                         |

### Sorting in reverse

In [95]:
tools.sort("price", descending=True).head(3)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500


### Sorting Based on Multiple Columns

In [96]:
tools.sort("brand", "price")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0


In [97]:
tools.sort("brand", "price", descending=[False, True])

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0


### Sorted Based on Expressions

In [76]:
tools.with_columns(
    (pl.col("rpm") / pl.col("price")).alias("rpm_per_price")
    ).sort(pl.col("rpm") / pl.col("price"))

tool,product,brand,cordless,price,rpm,rpm_per_price
str,str,str,bool,i64,i64,f64
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,,
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0,5.276382
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0,11.24031
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0,13.72549
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0,14.066496
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0,19.230769
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0,37.117904
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0,39.240506
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0,55.276382
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0,151.351351


## Sorting Nested Data Types

In [77]:
lists = pl.DataFrame({
    "lists": [[2, 2], [2, 1, 3], [1]]
    }
)
lists.sort("lists")

lists
list[i64]
[1]
"[2, 1, 3]"
"[2, 2]"


In [78]:
structs = pl.DataFrame(
    {
        "structs": [
            {"a": 1, "b": 2, "c": 3},
            {"a": 1, "b": 3, "c": 1},
            {"a": 1, "b": 1, "c": 2},
        ]
    }
)
structs.sort("structs")

structs
struct[3]
"{1,1,2}"
"{1,2,3}"
"{1,3,1}"


In [79]:
tools_collection = tools.group_by("brand").agg(collection=pl.struct(pl.all()))
tools_collection

brand,collection
str,list[struct[5]]
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",false,79,3100}, {""Router"",""POF 1400 ACE"",false,185,28000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",true,129,null}, {""Table Saw"",""DWE7485"",false,516,5800}]"
"""Makita""","[{""Rotary Hammer"",""HR2230"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",true,199,11000}]"


In [80]:
tools_collection.sort(pl.col("collection").list.len(), descending=True)

brand,collection
str,list[struct[5]]
"""Makita""","[{""Rotary Hammer"",""HR2230"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",true,199,11000}]"
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",false,79,3100}, {""Router"",""POF 1400 ACE"",false,185,28000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",true,129,null}, {""Table Saw"",""DWE7485"",false,516,5800}]"


In [81]:
tools_collection.sort(
    pl.col("collection")
    .list.eval(pl.element().struct.field("price"))
    .list.mean()
)

brand,collection
str,list[struct[5]]
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",false,79,3100}, {""Router"",""POF 1400 ACE"",false,185,28000}]"
"""Makita""","[{""Rotary Hammer"",""HR2230"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",true,199,11000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",true,129,null}, {""Table Saw"",""DWE7485"",false,516,5800}]"


In [82]:
tools_collection.with_columns(
    mean_price=pl.col("collection")
    .list.eval(pl.element().struct.field("price"))
    .list.mean()
)

brand,collection,mean_price
str,list[struct[5]],f64
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",false,79,3100}, {""Router"",""POF 1400 ACE"",false,185,28000}]",218.333333
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",true,129,null}, {""Table Saw"",""DWE7485"",false,516,5800}]",322.5
"""Makita""","[{""Rotary Hammer"",""HR2230"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",true,199,11000}]",248.4


## Related Row Operations

### Filtering Missing Values

In [83]:
tools.drop_nulls("rpm").height

9

In [84]:
tools.filter(pl.all_horizontal(pl.all().is_not_null())).height

9

### Slicing

In [85]:
tools.with_row_index().gather_every(2).head(3)

index,tool,product,brand,cordless,price,rpm
u32,str,str,str,bool,i64,i64
0,"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050
2,"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
4,"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100


### Top an Bottom Rows

In [86]:
tools.top_k(3, by="price") # Si se requiere ordenación previa, se puede hacer un sort() antes

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500


### Sampling

In [87]:
tools.sample(fraction=0.2)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800


### Semi-Joins

In [98]:
saws = pl.DataFrame(
    {
        "tool": [
            "Table Saw",
            "Plunge Cut Saw",
            "Miter Saw",
            "Jigsaw",
            "Bamndsaw",
            "Chainsaw",
            "Seesaw",
        ]
    }
)
tools.join(saws, how="semi", on="tool")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800


## Takeaways
- Filtrado de filas con expresiones y argumentos directos.
- Ordenamiento por una o varias columnas, incluyendo expresiones y tipos anidados (listas, structs).
- Agrupación y agregación avanzada, con resultados como structs o listas.
- Eliminación y filtrado de valores nulos.
- Slicing, sampling y selección de filas superiores/inferiores.
- Semi-joins para filtrar filas presentes en otro DataFrame.
- Polars destaca por su sintaxis clara, eficiencia y soporte para operaciones complejas sobre grandes volúmenes de datos.