# Jupyter Notebook: [Harnessing the Power of Polars and Delta Lake for Data Processingֲ.](https://blog.devgenius.io/harnessing-the-power-of-polars-and-delta-lake-for-data-processing-2d285ccfbef7)

## Setup and Imports.

In [None]:
import polars as pl
from deltalake import DeltaTable, WriterProperties

## Data Preparation and DataFrame Creation.

In [None]:
target_path = "./dl_example"

data = {
    "id": ["0001", "0002", "0003"],
    "col1": ["a", "b", "c"],
    "col2": [True, True, False],
    "col3": [1, 2, 3],
}
df = pl.DataFrame(data)

data_for_append = {
    "id": ["0004"],
    "col1": ["d"],
    "col2": [True],
    "col3": [4],
}
df_for_append = pl.DataFrame(data_for_append)

data_for_overwrite = {
    "id": ["0004", "0004"],
    "col1": ["dd", "ddd"],
    "col2": [True, True],
    "col3": [4, 40],
}
df_for_overwrite = pl.DataFrame(data_for_overwrite)

data_merge = {
    "id": ["0003", "0005"],
    "col1": ["cc", "e"],
    "col2": [True, True],
    "col3": [33, 5],
}
df_merge = pl.DataFrame(data_merge)

## Writing Data to Delta Lake.

In [None]:
df.write_delta(
    target=target_path,
    delta_write_options={"partition_by": ["id"]},
    mode="append",
)

In [None]:
!tree ./dl_example

In [None]:
df_for_append.write_delta(
    target=target_path,
    mode="append",
)

In [None]:
!tree ./dl_example

## Overwrite.

In [None]:
df_for_overwrite.write_delta(
    target=target_path,
    delta_write_options={"partition_filters": [("id", "=", "0004")]},
    mode="overwrite",
)

In [None]:
pl.read_delta(target_path)

## Merge.

In [None]:
(
    df_merge.write_delta(
        target=target_path,
        mode="merge",
        delta_merge_options={
            "predicate": "src.id = trg.id",
            "source_alias": "src",
            "target_alias": "trg",
        },
    )
    .when_matched_update_all()
    .when_not_matched_insert_all()
    .execute()
)

In [None]:
pl.read_delta(target_path)

## Reading Delta Tables.

In [None]:
pl.read_delta(target_path, version=0)

In [None]:
pl.read_delta(target_path, columns=["id"])

In [None]:
pl.read_delta(target_path, pyarrow_options={"partitions": [("id", "=", "0001")]})


## Metadata and History.

In [None]:
dt = DeltaTable(target_path)

In [None]:
dt.metadata()

In [None]:
dt.version()

In [None]:
dt.schema()

In [None]:
dt.schema().to_pyarrow()

In [None]:
dt.history()