# Jupyter Notebook: [Harnessing the Power of Polars and Delta Lake for Data Processingֲ.](https://blog.devgenius.io/harnessing-the-power-of-polars-and-delta-lake-for-data-processing-2d285ccfbef7)

In [None]:
# 📘 Notebook: Polars + Delta Lake Integration

## 1. Setup & Imports
import polars as pl
from deltalake import DeltaTable
# Requires polars[deltalake] or deltalake installed

target_path = "./dl_example"

## 2. Sample DataFrames
data = {
    "id": ["0001", "0002", "0003"],
    "col1": ["a", "b", "c"],
    "col2": [True, True, False],
    "col3": [1, 2, 3],
}
df = pl.DataFrame(data)

df_append = pl.DataFrame({
    "id": ["0004"],
    "col1": ["d"],
    "col2": [True],
    "col3": [4],
})

df_overwrite = pl.DataFrame({
    "id": ["0004", "0004"],
    "col1": ["dd", "ddd"],
    "col2": [True, True],
    "col3": [4, 40],
})

df_merge = pl.DataFrame({
    "id": ["0003", "0005"],
    "col1": ["cc", "e"],
    "col2": [True, True],
    "col3": [33, 5],
})

## 3. Write Modes: append/overwrite/merge

# Initial write (error if exists)
df.write_delta(target=target_path, mode="error")

# Append new row
df_append.write_delta(
    target=target_path,
    mode="append",
    delta_write_options={"partition_by": ["id"]}
)

# Overwrite mode
df_overwrite.write_delta(target=target_path, mode="overwrite")

# Merge: update existing id, insert new
df_merge.write_delta(
    target=target_path,
    mode="merge",
    delta_merge_options={
        "predicate": "s.id = t.id",
        "source_alias": "s",
        "target_alias": "t",
    }
).when_matched_update_all(predicate="t.col3 < s.col3") \
 .when_not_matched_insert_all() \
 .execute()

## 4. Read Data from Delta

# Eager read
df_current = pl.read_delta(target_path)
print("Eager read:")
print(df_current)

# Lazy scan
ldf = pl.scan_delta(target_path)
print("\nLazy scan result:")
print(ldf.collect())

## 5. Partitioned Reads (via pyarrow_options)

# Assuming 'col2' is a partition column
df_filtered = pl.read_delta(
    target_path,
    pyarrow_options={"partitions": [("col2", "=", True)]}
)
print("\nFiltered on partitioned col2=True:")
print(df_filtered)

## 6. Use DeltaTable for advanced operations
dt = DeltaTable(target_path)
pa_tbl = dt.to_pyarrow_table(filters=[("col2", "==", True)])
df_from_arrow = pl.from_arrow(pa_tbl)
print("\nRead via DeltaTable → Arrow → Polars:")
print(df_from_arrow)

## 7. Summary
print("""
✅ Modes supported: error, append, overwrite, merge
✅ Partition-aware reads via pyarrow_options
✅ Support for Eager, Lazy, and Arrow-based ingestion
✅ Full Delta Lake support with Polars & deltalake (rust)
""")
