# Combining Expression

In [32]:
import polars as pl

In [33]:
fruit = pl.read_csv("data/fruit.csv")
fruit

name,weight,color,is_round,origin
str,i64,str,bool,str
"""Avocado""",200,"""green""",False,"""South America"""
"""Banana""",120,"""yellow""",False,"""Asia"""
"""Blueberry""",1,"""blue""",False,"""North America"""
"""Cantaloupe""",2500,"""orange""",True,"""Africa"""
"""Cranberry""",2,"""red""",False,"""North America"""
"""Elderberry""",1,"""black""",False,"""Europe"""
"""Orange""",130,"""orange""",True,"""Asia"""
"""Papaya""",1000,"""orange""",False,"""South America"""
"""Peach""",150,"""orange""",True,"""Asia"""
"""Watermelon""",5000,"""green""",True,"""Africa"""


In [34]:
fruit.filter(pl.col("is_round") & (pl.col("weight") > 1000))

name,weight,color,is_round,origin
str,i64,str,bool,str
"""Cantaloupe""",2500,"""orange""",True,"""Africa"""
"""Watermelon""",5000,"""green""",True,"""Africa"""


## Inline Operators Versus Methods

In [35]:
(
    pl.DataFrame({"i": [6.0, 0, 2, 2.5], "j": [7.0, 1, 2, 3]}).with_columns(
        (pl.col("i") * pl.col("j")).alias("*"),
        pl.col("i").mul(pl.col("j")).alias("Expr.mul()")
    )
)

i,j,*,Expr.mul()
f64,f64,f64,f64
6.0,7.0,42.0,42.0
0.0,1.0,0.0,0.0
2.0,2.0,4.0,4.0
2.5,3.0,7.5,7.5


## Arthmetic Operations

In [36]:
fruit.select(pl.col("name"), (pl.col("weight") / 1000))

name,weight
str,f64
"""Avocado""",0.2
"""Banana""",0.12
"""Blueberry""",0.001
"""Cantaloupe""",2.5
"""Cranberry""",0.002
"""Elderberry""",0.001
"""Orange""",0.13
"""Papaya""",1.0
"""Peach""",0.15
"""Watermelon""",5.0


| Operador en línea | Método            | Descripción                           |
|-------------------|------------------|---------------------------------------|
| `+`               | `expr.add()`     | Suma                                  |
| `-`               | `expr.sub()`     | Resta                                 |
| `*`               | `expr.mul()`     | Multiplicación                        |
| `/`               | `expr.truediv()` | División real (punto flotante)         |
| `//`              | `expr.floordiv()`| División entera (redondeo hacia abajo) |
| `**`              | `expr.pow()`     | Exponenciación                        |
| `%`               | `expr.mod()`     | Módulo (resto)                        |
| N/A               | `expr.dot()`     | Producto punto (multiplicación vectorial)|

Utiliza operadores en línea para expresiones rápidas y métodos para mayor claridad o composición en Polars.

In [37]:
with pl.Config(float_precision=2, tbl_cell_numeric_alignment="RIGHT"):
    print((
        pl.DataFrame({"i": [0.0, 2, 2, -2, -2], "j": [1, 2, 3, 4, -5]})).with_columns(
            (pl.col("i") + pl.col("j")).alias("i + j"),
            (pl.col("i") - pl.col("j")).alias("i - j"),
            (pl.col("i") * pl.col("j")).alias("i * j"),
            (pl.col("i") / pl.col("j")).alias("i / j"),
            (pl.col("i") // pl.col("j")).alias("i // j"),
            (pl.col("i") % pl.col("j")).alias("i % j"),
            (pl.col("i") ** pl.col("j")).alias("i ** j"),  
            (pl.col("i").dot(pl.col("j"))).alias("i . j"),
    ))

shape: (5, 10)
┌───────┬─────┬───────┬───────┬───┬────────┬───────┬────────┬───────┐
│     i ┆   j ┆ i + j ┆ i - j ┆ … ┆ i // j ┆ i % j ┆ i ** j ┆ i . j │
│   --- ┆ --- ┆   --- ┆   --- ┆   ┆    --- ┆   --- ┆    --- ┆   --- │
│   f64 ┆ i64 ┆   f64 ┆   f64 ┆   ┆    f64 ┆   f64 ┆    f64 ┆   f64 │
╞═══════╪═════╪═══════╪═══════╪═══╪════════╪═══════╪════════╪═══════╡
│  0.00 ┆   1 ┆  1.00 ┆ -1.00 ┆ … ┆   0.00 ┆  0.00 ┆   0.00 ┆ 12.00 │
│  2.00 ┆   2 ┆  4.00 ┆  0.00 ┆ … ┆   1.00 ┆  0.00 ┆   4.00 ┆ 12.00 │
│  2.00 ┆   3 ┆  5.00 ┆ -1.00 ┆ … ┆   0.00 ┆  2.00 ┆   8.00 ┆ 12.00 │
│ -2.00 ┆   4 ┆  2.00 ┆ -6.00 ┆ … ┆  -1.00 ┆  2.00 ┆  16.00 ┆ 12.00 │
│ -2.00 ┆  -5 ┆ -7.00 ┆  3.00 ┆ … ┆   0.00 ┆ -2.00 ┆  -0.03 ┆ 12.00 │
└───────┴─────┴───────┴───────┴───┴────────┴───────┴────────┴───────┘


## Comparison Operations

In [38]:
(
    fruit.select(
        pl.col("name"),
        pl.col("weight"),
    ).filter(pl.col("weight") >= 1000)
)

name,weight
str,i64
"""Cantaloupe""",2500
"""Papaya""",1000
"""Watermelon""",5000


| Operador en línea | Método         | Descripción                          |
|-------------------|---------------|--------------------------------------|
| `==`              | `expr.eq()`   | Igualdad                             |
| `!=`              | `expr.ne()`  | Desigualdad                          |
| `<`               | `expr.lt()`   | Menor que                            |
| `<=`              | `expr.le()`   | Menor o igual que                    |
| `>`               | `expr.gt()`   | Mayor que                            |
| `>=`              | `expr.ge()`   | Mayor o igual que                    |

Utiliza operadores en línea para expresiones rápidas y métodos para mayor claridad o composición en Polars.

In [39]:
pl.select(pl.lit(3).is_between(3, 5)).item()

True

In [43]:
with pl.Config(float_precision=2, tbl_cell_numeric_alignment="RIGHT", tbl_cols=-1):
    print(
        pl.DataFrame(
            {"a": [-273.15, 0, 42, 100], "b": [1.4142, 2.7183, 42, 3.1415]}
        ).with_columns(
            (pl.col("a") == pl.col("b")).alias("a == b"),
            (pl.col("a") <= pl.col("b")).alias("a <= b"),
            (pl.all() > 0).name.suffix(" > 0"),
            ((pl.col("b") - pl.lit(2).sqrt()).abs() < 1e-3).alias("b ≈ √2"), 
            (pl.col("a") >= pl.col("b")).alias("a >= b"),
            ((1 < pl.col("b")) & (pl.col("b") < 3)).alias("1 < b < 3"),
        )
    )

shape: (4, 9)
┌─────────┬───────┬────────┬────────┬───────┬───────┬────────┬────────┬───────────┐
│       a ┆     b ┆ a == b ┆ a <= b ┆ a > 0 ┆ b > 0 ┆ b ≈ √2 ┆ a >= b ┆ 1 < b < 3 │
│     --- ┆   --- ┆ ---    ┆ ---    ┆ ---   ┆ ---   ┆ ---    ┆ ---    ┆ ---       │
│     f64 ┆   f64 ┆ bool   ┆ bool   ┆ bool  ┆ bool  ┆ bool   ┆ bool   ┆ bool      │
╞═════════╪═══════╪════════╪════════╪═══════╪═══════╪════════╪════════╪═══════════╡
│ -273.15 ┆  1.41 ┆ false  ┆ true   ┆ false ┆ true  ┆ true   ┆ false  ┆ true      │
│    0.00 ┆  2.72 ┆ false  ┆ true   ┆ false ┆ true  ┆ false  ┆ false  ┆ true      │
│   42.00 ┆ 42.00 ┆ true   ┆ true   ┆ true  ┆ true  ┆ false  ┆ true   ┆ false     │
│  100.00 ┆  3.14 ┆ false  ┆ false  ┆ true  ┆ true  ┆ false  ┆ true   ┆ false     │
└─────────┴───────┴────────┴────────┴───────┴───────┴────────┴────────┴───────────┘


In [44]:
pl.select(
    bool_num=pl.lit(True) > 0,
    time_time=pl.time(23, 58) > pl.time(0, 0),
    datetime_date=pl.datetime(1969, 7, 21, 2, 56) < pl.date(1976, 7, 20),
    str_num=pl.lit("5") < pl.lit(3).cast(pl.String),  
    datetime_time=pl.datetime(1999, 1, 1).dt.time() != pl.time(0, 0),  
    ).transpose(  
    include_header=True, header_name="comparison", column_names=["allowed"]
 )

comparison,allowed
str,bool
"""bool_num""",True
"""time_time""",True
"""datetime_date""",True
"""str_num""",False
"""datetime_time""",False


## Boolean Algebra Operations

In [45]:
x = 7
p = pl.lit(3) < pl.lit(x)  # True
q = pl.lit(x) < pl.lit(5)  # False
pl.select(p & q).item()

False

| Operador en línea | Método         | Descripción         |
|-------------------|---------------|---------------------|
| `&`               | `expr.and_()` | Conjunción lógica (AND) |
| `|`              | `expr.or_()`  | Disyunción lógica (OR)  |
| `~`               | `expr.not_()` | Negación lógica (NOT)   |
| `^`               | `expr.xor()`  | XOR lógico (exclusivo)  |

Utiliza operadores en línea para expresiones rápidas y métodos para mayor claridad o composición en Polars.

In [49]:
(
	pl.DataFrame(
		{"p": [True, True, False, False], "q": [True, False, True, False]}
	).with_columns(
        (pl.col("p") & pl.col("q")).alias("p & q"),
        (pl.col("p") | pl.col("q")).alias("p | q"),
        (~pl.col("p")).alias("~p"), 
        (pl.col("p") ^ pl.col("q")).alias("p ^ q"),
        (~(pl.col("p") & pl.col("q"))).alias("p ↑ q"),
        ((pl.col("p").or_(pl.col("q"))).not_()).alias("p ↓ q"), 
	)
)

p,q,p & q,p | q,~p,p ^ q,p ↑ q,p ↓ q
bool,bool,bool,bool,bool,bool,bool,bool
True,True,True,True,False,False,False,False
True,False,False,True,False,True,True,False
False,True,False,True,True,True,True,False
False,False,False,False,True,False,True,True


## Bitwise Operations

In [51]:
pl.select(pl.lit(10) | pl.lit(34)).item()

42

In [52]:
bits = pl.DataFrame(
	{"x": [1, 1, 0, 0, 7, 10], "y": [1, 0, 1, 0, 2, 34]},
	schema={"x": pl.UInt8, "y": pl.UInt8},
).with_columns(
	(pl.col("x") & pl.col("y")).alias("x & y"),
	(pl.col("x") | pl.col("y")).alias("x | y"),
	(~pl.col("x")).alias("~x"),
	(pl.col("x") ^ pl.col("y")).alias("x ^ y"),
)
bits

x,y,x & y,x | y,~x,x ^ y
u8,u8,u8,u8,u8,u8
1,1,1,1,254,0
1,0,0,1,254,1
0,1,0,1,255,1
0,0,0,0,255,0
7,2,2,7,248,5
10,34,2,42,245,40


In [54]:
print(bits.select(pl.all().map_elements("{0:08b}".format, return_dtype=pl.String)))

shape: (6, 6)
┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
│ x        ┆ y        ┆ x & y    ┆ x | y    ┆ ~x       ┆ x ^ y    │
│ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---      │
│ str      ┆ str      ┆ str      ┆ str      ┆ str      ┆ str      │
╞══════════╪══════════╪══════════╪══════════╪══════════╪══════════╡
│ 00000001 ┆ 00000001 ┆ 00000001 ┆ 00000001 ┆ 11111110 ┆ 00000000 │
│ 00000001 ┆ 00000000 ┆ 00000000 ┆ 00000001 ┆ 11111110 ┆ 00000001 │
│ 00000000 ┆ 00000001 ┆ 00000000 ┆ 00000001 ┆ 11111111 ┆ 00000001 │
│ 00000000 ┆ 00000000 ┆ 00000000 ┆ 00000000 ┆ 11111111 ┆ 00000000 │
│ 00000111 ┆ 00000010 ┆ 00000010 ┆ 00000111 ┆ 11111000 ┆ 00000101 │
│ 00001010 ┆ 00100010 ┆ 00000010 ┆ 00101010 ┆ 11110101 ┆ 00101000 │
└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘


## Funciones para combinar expresiones en Polars

| Función                        | Descripción                                                                                   |
|-------------------------------|-----------------------------------------------------------------------------------------------|
| `pl.all_horizontal(…)`        | Calcula el AND bit a bit horizontal entre columnas.                                           |
| `pl.any_horizontal(…)`        | Calcula el OR bit a bit horizontal entre columnas.                                            |
| `pl.arctan2(…)`               | Calcula el arcotangente de dos argumentos en radianes.                                        |
| `pl.arctan2d(…)`              | Calcula el arcotangente de dos argumentos en grados.                                          |
| `pl.arg_sort_by(…)`           | Devuelve los índices de fila que ordenarían las columnas.                                     |
| `pl.arg_where(…)`             | Devuelve los índices donde la condición es verdadera.                                         |
| `pl.coalesce(…)`              | Recorre las columnas de izquierda a derecha, conservando el primer valor no nulo.             |
| `pl.concat_list(…)`           | Concatena horizontalmente columnas en una sola columna de tipo Lista.                         |
| `pl.concat_str(…)`            | Concatena horizontalmente columnas en una sola columna de tipo String.                        |
| `pl.corr(…)`                  | Calcula la correlación de Pearson o Spearman entre dos columnas.                              |
| `pl.cov(…)`                   | Calcula la covarianza entre dos columnas/expresiones.                                         |
| `pl.cum_fold(…)`              | Aplica un fold acumulativo horizontal sobre columnas con un fold por la izquierda.            |
| `pl.cum_reduce(…)`            | Reduce acumulativamente horizontalmente sobre columnas con un fold por la izquierda.          |
| `pl.cum_sum_horizontal(…)`    | Suma acumulativamente todos los valores horizontalmente entre columnas.                       |
| `pl.fold(…)`                  | Acumula sobre varias columnas horizontalmente/fila por fila con un fold por la izquierda.     |
| `pl.format(…)`                | Formatea expresiones como String.                                                             |
| `pl.map_batches(…)`           | Aplica una función personalizada sobre varias columnas/expresiones.                           |
| `pl.max_horizontal(…)`        | Obtiene el valor máximo horizontal entre columnas.                                            |
| `pl.min_horizontal(…)`        | Obtiene el valor mínimo horizontal entre columnas.                                            |
| `pl.reduce(…)`                | Acumula sobre varias columnas horizontalmente/fila por fila con un fold por la izquierda.     |
| `pl.rolling_corr(…)`          | Calcula la correlación móvil entre dos columnas/expresiones.                                  |
| `pl.rolling_cov(…)`           | Calcula la covarianza móvil entre dos columnas/expresiones.                                   |
| `pl.struct(…)`                | Agrupa columnas en una columna de tipo struct.                                                |
| `pl.sum_horizontal(…)`        | Suma todos los valores horizontalmente entre columnas.                                        |
| `pl.when(…)`                  | Inicia una expresión condicional when-then-otherwise.                                         |

In [55]:
scientists = pl.DataFrame(
    {
    "first_name": ["George", "Grace", "John", "Kurt", "Ada"],
    "last_name": ["Boole", "Hopper", "Tukey", "Gödel", "Lovelace"],
    "country": [
    "England",
    "United States",
    "United States",
    "Austria-Hungary",
    "England",
    ],
    }
)
scientists

first_name,last_name,country
str,str,str
"""George""","""Boole""","""England"""
"""Grace""","""Hopper""","""United States"""
"""John""","""Tukey""","""United States"""
"""Kurt""","""Gödel""","""Austria-Hungary"""
"""Ada""","""Lovelace""","""England"""


In [56]:
scientists.select(
    contcat_list=pl.concat_list(pl.col("^*_name$")),
    struct=pl.struct(pl.all()) 
)

contcat_list,struct
list[str],struct[3]
"[""George"", ""Boole""]","{""George"",""Boole"",""England""}"
"[""Grace"", ""Hopper""]","{""Grace"",""Hopper"",""United States""}"
"[""John"", ""Tukey""]","{""John"",""Tukey"",""United States""}"
"[""Kurt"", ""Gödel""]","{""Kurt"",""Gödel"",""Austria-Hungary""}"
"[""Ada"", ""Lovelace""]","{""Ada"",""Lovelace"",""England""}"


In [58]:
scientists.select(
    concat_str=pl.concat_str(pl.all(), separator=" "),
    format=pl.format("{}, {} from {}", "last_name", "first_name", "country"),
)

concat_str,format
str,str
"""George Boole England""","""Boole, George from England"""
"""Grace Hopper United States""","""Hopper, Grace from United Stat…"
"""John Tukey United States""","""Tukey, John from United States"""
"""Kurt Gödel Austria-Hungary""","""Gödel, Kurt from Austria-Hunga…"
"""Ada Lovelace England""","""Lovelace, Ada from England"""


In [59]:
prefs = pl.DataFrame(
    {
    "id": [1, 7, 42, 101, 999],
    "has_pet": [True, False, True, False, True],
    "likes_travel": [False, False, False, False, True],
    "likes_movies": [True, False, True, False, True],
    "likes_books": [False, False, True, True, True],
    }
    ).with_columns(
    all=pl.all_horizontal(pl.exclude("id")),
    any=pl.any_horizontal(pl.exclude("id")),
)
prefs

id,has_pet,likes_travel,likes_movies,likes_books,all,any
i64,bool,bool,bool,bool,bool,bool
1,True,False,True,False,False,True
7,False,False,False,False,False,False
42,True,False,True,True,False,True
101,False,False,False,True,False,True
999,True,True,True,True,True,True


In [60]:
prefs.select(
    sum=pl.sum_horizontal(pl.all()),
    max=pl.max_horizontal(pl.all()),
    min=pl.min_horizontal(pl.all()),
)

sum,max,min
i64,i64,i64
4,1,0
7,7,0
46,42,0
103,101,0
1005,999,1
