# [Expressions: Basic Operations](https://docs.pola.rs/user-guide/expressions/basic-operations/)

In [1]:
import polars as pl
import numpy as np

In [2]:
np.random.seed(42)

df = pl.DataFrame(
    {
        "nrs": [1, 2, 3, None, 5],
        "names": ["foo", "ham", "spam", "egg", "spam"],
        "random": np.random.rand(5),
        "groups": ["A", "A", "B", "A", "B"],
    }
)
df

nrs,names,random,groups
i64,str,f64,str
1.0,"""foo""",0.37454,"""A"""
2.0,"""ham""",0.950714,"""A"""
3.0,"""spam""",0.731994,"""B"""
,"""egg""",0.598658,"""A"""
5.0,"""spam""",0.156019,"""B"""


## Basic arithmetic

you can also use `.add(x)`, `.truediv(x)`, `.pow(x)` etc. 

In [3]:
result = df.select(
    (pl.col("nrs") + 5).alias("nrs + 5"),
    (pl.col("nrs") - 5).alias("nrs - 5"),
    (pl.col("nrs") * pl.col("random")).alias("nrs * random"),
    (pl.col("nrs") / pl.col("random")).alias("nrs / random"),
    (pl.col("nrs") ** 2).alias("nrs ** 2"),
    (pl.col("nrs") % 3).alias("nrs ** 3"),
)
result

nrs + 5,nrs - 5,nrs * random,nrs / random,nrs ** 2,nrs ** 3
i64,i64,f64,f64,i64,i64
6.0,-4.0,0.37454,2.669941,1.0,1.0
7.0,-3.0,1.901429,2.103681,4.0,2.0
8.0,-2.0,2.195982,4.098395,9.0,0.0
,,,,,
10.0,0.0,0.780093,32.047453,25.0,2.0


## Comparisons

In [4]:
result = df.select(
    (pl.col("nrs") > 1).alias("nrs > 1"),
    (pl.col("nrs") >= 3).alias("nrs >= 3"),
    (pl.col("random") < 0.2).alias("random < .2"),
    (pl.col("random") <= 0.5).alias("random <= .5"),
    (pl.col("nrs")  != 1).alias("nrs != 1"),
    (pl.col("nrs")  == 1).alias("nrs == 1"),
)
result

nrs > 1,nrs >= 3,random < .2,random <= .5,nrs != 1,nrs == 1
bool,bool,bool,bool,bool,bool
False,False,False,True,False,True
True,False,False,False,True,False
True,True,False,False,True,False
,,False,False,,
True,True,True,True,True,False


## Boolean and bitwise operations

You can also use `.or_`, `.and_` and `.not_`

In [7]:
result = df.select(
    ((~pl.col("nrs").is_null()) & (pl.col("groups") == "A")).alias("number not null and group A"),
    ((pl.col("random") < 0.5) | (pl.col("groups") == "B")).alias("random < 0.5 or group B"),
)
result

number not null and group A,random < 0.5 or group B
bool,bool
True,True
True,False
False,True
False,False
False,True


It's possible to use this for bitwise operations by using the boolean operators on numbers. 

## Counting unique values

In [12]:
long_df = pl.DataFrame({"numbers": np.random.randint(0, 100_000, 100_000)})
result = long_df.select(
    pl.col("numbers").n_unique().alias("n_unique"),
    pl.col("numbers").approx_n_unique().alias("approx_n_unique") # inaccurate, but faster on big datasets
)
result

n_unique,approx_n_unique
u32,u32
63152,63311


the code bellow gives the value count, as a struct.

In [14]:
result = df.select(
    pl.col("names").value_counts().alias("value counts")
)
result

value counts
struct[2]
"{""spam"",2}"
"{""ham"",1}"
"{""egg"",1}"
"{""foo"",1}"


In [17]:
df.select(
    pl.col("names").unique(maintain_order=True),
    pl.col("names").unique_counts().alias("unique counts")
)

names,unique counts
str,u32
"""foo""",1
"""ham""",1
"""spam""",2
"""egg""",1


## conditionals

this is very handy, you can make giant it-else statements.

In [34]:
df.select(
    pl.col("nrs"),
    pl.when(pl.col("nrs") == 1)
        .then(pl.col("nrs") + 10)
        .when(pl.col("nrs") == 2)
        .then(pl.col("nrs") + 100)
        .otherwise(pl.col("nrs") - 1)
        .alias("Collatz")
)

nrs,Collatz
i64,i64
1.0,11.0
2.0,102.0
3.0,2.0
,
5.0,4.0
