# [Expressions: Casting](https://docs.pola.rs/user-guide/expressions/casting/)

In [1]:
import polars as pl

df = pl.DataFrame(
    {
        "integers": [1, 2, 3],
        "big_integers": [10000002, 2, 30000003],
        "floats": [4.0, 5.8, -6.3],
    }
)
df

integers,big_integers,floats
i64,i64,f64
1,10000002,4.0
2,2,5.8
3,30000003,-6.3


## Basic example

Polars uses the `cast` function to transform data types. There is an parameter `strict` (default) that determines if an value can not be converted. if it is set to False, then values that can't be converted will be set to null.

In [2]:
df.select(
    pl.col("integers").cast(pl.Float32).alias("integers_as_float"),
    pl.col("floats").cast(pl.Int32).alias("floats_as_integers")
)

integers_as_float,floats_as_integers
f32,i32
1.0,4
2.0,5
3.0,-6


## Downcasting numerical data types

In [5]:
print(f"before: {df.estimated_size()}")
result = df.with_columns(
    pl.col("integers").cast(pl.Int16),
    pl.col("floats").cast(pl.Float32)
)
print(f"after: {result.estimated_size()}")

before: 72
after: 42


If you downcast a coolumn to a type that's too small, polars will give an error.

In [6]:
from polars.exceptions import InvalidOperationError

try:
    result = df.select(pl.col("big_integers").cast(pl.Int8))
    print(result)
except InvalidOperationError as err:
    print(err)

conversion from `i64` to `i8` failed in column 'big_integers' for 2 out of 3 values: [10000002, 30000003]


Changing the `strict` parameter will change over- and underflowing values to null.

In [7]:
result = df.select(pl.col("big_integers").cast(pl.Int8, strict=False))
print(result)

shape: (3, 1)
┌──────────────┐
│ big_integers │
│ ---          │
│ i8           │
╞══════════════╡
│ null         │
│ 2            │
│ null         │
└──────────────┘


## Converting strings to numeric data types

In [10]:
df = pl.DataFrame(
    {
        "integers_as_strings": ["1", "2", "3"],
        "floats_as_strings": ["4.0", "5.8", "-6.3"],
        "floats": [4.0, 5.8, -6.3],
    }
)

df.select(
    pl.col("integers_as_strings").cast(pl.Int32),
    pl.col("floats_as_strings").cast(pl.Float64),
    pl.col("floats").cast(pl.String),
)

integers_as_strings,floats_as_strings,floats
i32,f64,str
1,4.0,"""4.0"""
2,5.8,"""5.8"""
3,-6.3,"""-6.3"""


In [11]:
df = pl.DataFrame(
    {
        "floats": ["4.0", "5.8", "- 6 . 3"],
    }
)
try:
    result = df.select(pl.col("floats").cast(pl.Float64))
except InvalidOperationError as err:
    print(err)

conversion from `str` to `f64` failed in column 'floats' for 1 out of 3 values: ["- 6 . 3"]


## Booleans

0 is converted to False, others will be converted to 1.

In [13]:
df = pl.DataFrame(
    {
        "integers": [-1, 0, 2, 3, 4],
        "floats": [0.0, 1.0, 2.0, 3.0, 4.0],
        "bools": [True, False, True, False, True],
    }
)
df.select(
    pl.col("integers").cast(pl.Boolean),
    pl.col("floats").cast(pl.Boolean),
    pl.col("bools").cast(pl.Int8),
)

integers,floats,bools
bool,bool,i8
True,False,1
False,True,0
True,True,1
True,True,0
True,True,1


## Parsing / formatting temporal data types

In [18]:
from datetime import date, datetime, time

df = pl.DataFrame(
    {
        "date": [
            date(1970, 1, 1),  # epoch
            date(1970, 1, 10),  # 9 days later
        ],
        "datetime": [
            datetime(1970, 1, 1, 0, 0, 0),  # epoch
            datetime(1970, 1, 1, 0, 1, 0),  # 1 minute later
        ],
        "time": [
            time(0, 0, 0),  # reference time
            time(0, 0, 1),  # 1 second later
        ],
    }
)
df.select(
    pl.col("date").cast(pl.Int64).alias("days_since_epoch"),
    pl.col("datetime").cast(pl.Int64).alias("us_since_epoch"),
    pl.col("time").cast(pl.Int64).alias("ns_since_epoch"),
)

days_since_epoch,us_since_epoch,ns_since_epoch
i64,i64,i64
0,0,0
9,60000000,1000000000


In [23]:
df = pl.DataFrame(
    {
        "date": [date(2022, 1, 1), date(2022, 1, 2)],
        "string": ["2022-01-01", "2022-01-02"],
    }
)

df.select(
    pl.col("date").dt.to_string("%Y-%m-%d"),
    pl.col("string").str.to_datetime("%Y-%m-%d")
)

date,string
str,datetime[μs]
"""2022-01-01""",2022-01-01 00:00:00
"""2022-01-02""",2022-01-02 00:00:00
