In [1]:
import polars as pl

## Create DataFrame

Ref:

- https://docs.pola.rs/api/python/stable/reference/dataframe/index.html

Create DataFrame from ``{"col": [values...]}``

In [2]:
data = {"a": [1, 2], "b": [3, 4]}
df = pl.DataFrame(data)
df

a,b
i64,i64
1,3
2,4


In [3]:
df.dtypes

[Int64, Int64]

In [4]:
df.schema

Schema([('a', Int64), ('b', Int64)])

In [5]:
# {"col1": [...], "col2": [...]}
# this is most efficient because polars is column oriented
data = {"col1": [0, 2], "col2": [3, 7]}
df = pl.DataFrame(data, schema={"col1": pl.Float32, "col2": pl.Int64})
df

col1,col2
f32,i64
0.0,3
2.0,7


In [6]:
data = {"col1": [1, 2], "col2": [3, 4]}
df = pl.DataFrame(data, schema=[("col1", pl.Float32), ("col2", pl.Int64)])
df

col1,col2
f32,i64
1.0,3
2.0,4


In [7]:
data = {"col1": [0, 2], "col2": [3, 7]}
df = pl.DataFrame(data, schema={"col1": float, "col2": int})
df

col1,col2
f64,i64
0.0,3
2.0,7


In [8]:
data = {"col1": [1, 2], "col2": [3, 4]}
df = pl.DataFrame(data, schema=[("col1", float), ("col2", int)])
df

col1,col2
f64,i64
1.0,3
2.0,4


Create DataFrame from 2d array (list of list)

In [9]:
data = [[1, "Alice"], [2, "Bob"]]
df = pl.DataFrame(data, schema=["id", "name"], orient="row")
df

id,name
i64,str
1,"""Alice"""
2,"""Bob"""


Create DataFrame from list of dictionary (struct)

In [10]:
# For [{key: value}, ...], polars try infer the data type
data = [
    {"id": 1, "name": "Alice"},
    {"id": 2, "name": "Bob"},
]
df = pl.DataFrame(data)
df

id,name
i64,str
1,"""Alice"""
2,"""Bob"""


In [11]:
# For [{key: value}, ...], I suggest to define schema explicitly
data = [
    {"id": 1, "name": "Alice"},
    {"id": 2, "name": "Bob"},
]
df = pl.DataFrame(data, schema={"id": int, "name": str})
df

id,name
i64,str
1,"""Alice"""
2,"""Bob"""


## Handle Type Mismatch

In [12]:
# For [{key: value}, ...], I suggest to define schema explicitly
data = [
    {"id": 1, "name": "Alice", "bank_account": "1111111111"},
    {"id": 2, "name": "Bob", "bank_account": "2222222222"},
    {"id": 3, "name": "Cathy", "bank_account": 3333333333},
]
df = pl.DataFrame(data)
df

id,name,bank_account
i64,str,str
1,"""Alice""","""1111111111"""
2,"""Bob""","""2222222222"""
3,"""Cathy""","""3333333333"""


In [13]:
data = [
    {"id": 1, "name": "Alice", "bank_account": "1111111111"},
    {"id": 2, "name": "Bob", "bank_account": "2222222222"},
    {"id": 3, "name": "Cathy", "bank_account": 3333333333},
]
df = pl.DataFrame(data, schema={"id": int, "name": str, "bank_account": str})
df

id,name,bank_account
i64,str,str
1,"""Alice""","""1111111111"""
2,"""Bob""","""2222222222"""
3,"""Cathy""","""3333333333"""


## Pretty Print a DataFrame

In [15]:
import polars as pl

data = [
    {"id": 1, "name": f"Name-{i}", "details": {"Phone": "111-222-3333", "SSN": f"SSN-{i}"}}
    for i in range(1, 10)
]
df = pl.DataFrame(data)
df

id,name,details
i64,str,struct[2]
1,"""Name-1""","{""111-222-3333"",""SSN-1""}"
1,"""Name-2""","{""111-222-3333"",""SSN-2""}"
1,"""Name-3""","{""111-222-3333"",""SSN-3""}"
1,"""Name-4""","{""111-222-3333"",""SSN-4""}"
1,"""Name-5""","{""111-222-3333"",""SSN-5""}"
1,"""Name-6""","{""111-222-3333"",""SSN-6""}"
1,"""Name-7""","{""111-222-3333"",""SSN-7""}"
1,"""Name-8""","{""111-222-3333"",""SSN-8""}"
1,"""Name-9""","{""111-222-3333"",""SSN-9""}"


[tabulate](https://pypi.org/project/tabulate/) library convert your dataframe to beautiful ascii table.

In [16]:
from tabulate import tabulate
from pathlib import Path

dir_here = Path.cwd()
path = dir_here / "dataframe.txt"

text = tabulate(df.to_dict(), headers=list(df.schema),  tablefmt="grid")
path.write_text(text)
print(f"See Dataframe at: file://{path}")
print(text) # You can also write it to file

See Dataframe at: file:///Users/sanhehu/Documents/GitHub/learn_polars-project/docs/source/02-DataFrame/01-Construct-A-DataFrame/dataframe.txt
+------+--------+-------------------------------------------+
|   id | name   | details                                   |
|    1 | Name-1 | {'Phone': '111-222-3333', 'SSN': 'SSN-1'} |
+------+--------+-------------------------------------------+
|    1 | Name-2 | {'Phone': '111-222-3333', 'SSN': 'SSN-2'} |
+------+--------+-------------------------------------------+
|    1 | Name-3 | {'Phone': '111-222-3333', 'SSN': 'SSN-3'} |
+------+--------+-------------------------------------------+
|    1 | Name-4 | {'Phone': '111-222-3333', 'SSN': 'SSN-4'} |
+------+--------+-------------------------------------------+
|    1 | Name-5 | {'Phone': '111-222-3333', 'SSN': 'SSN-5'} |
+------+--------+-------------------------------------------+
|    1 | Name-6 | {'Phone': '111-222-3333', 'SSN': 'SSN-6'} |
+------+--------+-----------------------------------