# Chapter 2: Reading and Writing Files

## Reading and writing CSV files

### Getting ready

In [29]:
import polars as pl

### How to do it...

In [30]:
df = pl.read_csv('../data/customer_shopping_data.csv')
df.head()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i64,str,i64,f64,str,str,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""","""5/8/2022""","""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""","""12/12/2021""","""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""","""9/11/2021""","""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""","""16/05/2021""","""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""","""24/10/2021""","""Kanyon"""


In [31]:
df = pl.read_csv('../data/customer_shopping_data_no_header.csv')
df.head()

I138884,C241288,Female,28,Clothing,5,1500.4,Credit Card,5/8/2022,Kanyon
str,str,str,i64,str,i64,f64,str,str,str
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""","""12/12/2021""","""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""","""9/11/2021""","""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""","""16/05/2021""","""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""","""24/10/2021""","""Kanyon"""
"""I227836""","""C657758""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""","""24/05/2022""","""Forum Istanbul"""


In [32]:
df = pl.read_csv('../data/customer_shopping_data_no_header.csv', has_header=False)
df.head()

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10
str,str,str,i64,str,i64,f64,str,str,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""","""5/8/2022""","""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""","""12/12/2021""","""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""","""9/11/2021""","""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""","""16/05/2021""","""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""","""24/10/2021""","""Kanyon"""


In [33]:
column_names = ['invoice_no', 'customer_id', 'gender', 'age', 'category', 'quantity', 'price', 'payment_method', 'invoice_date', 'shopping_mall']
df = pl.read_csv('../data/customer_shopping_data_no_header.csv', 
                 has_header=False, 
                 new_columns=column_names)
df.head()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i64,str,i64,f64,str,str,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""","""5/8/2022""","""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""","""12/12/2021""","""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""","""9/11/2021""","""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""","""16/05/2021""","""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""","""24/10/2021""","""Kanyon"""


In [34]:
df = pl.read_csv('../data/customer_shopping_data_no_header.csv', 
                 has_header=False, 
                 new_columns=column_names, 
                 try_parse_dates=True)
df.head()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i64,str,i64,f64,str,date,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""",2022-08-05,"""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""",2021-12-12,"""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""",2021-11-09,"""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""",2021-05-16,"""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""",2021-10-24,"""Kanyon"""


In [35]:
df = pl.read_csv('../data/customer_shopping_data_no_header.csv', 
                 has_header=False, 
                 new_columns=column_names, 
                 try_parse_dates=True, 
                 schema_overrides={'age': pl.Int8, 'quantity': pl.Int32})
df.head()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i8,str,i32,f64,str,date,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""",2022-08-05,"""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""",2021-12-12,"""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""",2021-11-09,"""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""",2021-05-16,"""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""",2021-10-24,"""Kanyon"""


In [36]:
df.write_csv('../data/output/shopping_data_output.csv', 
             include_header=False, 
             separator=',')

### There is more...

In [37]:
lf = pl.scan_csv('../data/customer_shopping_data_no_header.csv', 
                 has_header=False, 
                 new_columns=column_names, 
                 try_parse_dates=True, 
                 schema_overrides={'age': pl.Int8, 'quantity': pl.Int32})
lf.head().collect()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i8,str,i32,f64,str,date,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""",2022-08-05,"""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""",2021-12-12,"""Forum Istanbul"""
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""",2021-11-09,"""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""",2021-05-16,"""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""",2021-10-24,"""Kanyon"""


In [38]:
lf.sink_csv('../data/output/shopping_data_output_sink.csv')

## Reading and writing parquet files

### How to do it...

In [39]:
parquet_input_file_path = '../data/venture_funding_deals.parquet'
df = pl.read_parquet(parquet_input_file_path, 
                     columns=['Company', 'Amount', 'Valuation', 'Industry'], 
                     row_index_name='row_cnt')
df.head()

row_cnt,Company,Amount,Valuation,Industry
u32,str,str,str,str
0,"""OpenAI""","""$10,000,000,000""","""n/a""","""Artificial intelligence"""
1,"""Stripe""","""$6,500,000,000""","""$50,000,000,000""","""Fintech"""
2,"""Inflection AI""","""$1,300,000,000""","""$4,000,000,000""","""Artificial intelligence"""
3,"""Anthropic""","""$1,250,000,000""","""$4,000,000,000""","""Artificial intelligence"""
4,"""Generate Capital""","""$1,030,900,000""","""n/a""","""Energy"""


In [40]:
pl.read_parquet_schema(parquet_input_file_path)

{'Company': String,
 'Amount': String,
 'Lead investors': String,
 'Valuation': String,
 'Industry': String,
 'Date reported': String}

In [41]:
parquet_output_file_path = '../data/output/venture_funding_deals_output.parquet'
df.write_parquet(parquet_output_file_path, compression='lz4', compression_level=10)

In [42]:
lf = pl.scan_parquet(parquet_input_file_path)
lf.head().collect()

Company,Amount,Lead investors,Valuation,Industry,Date reported
str,str,str,str,str,str
"""OpenAI""","""$10,000,000,000""","""Microsoft""","""n/a""","""Artificial intelligence""","""1/23/23"""
"""Stripe""","""$6,500,000,000""","""n/a""","""$50,000,000,000""","""Fintech""","""3/15/23"""
"""Inflection AI""","""$1,300,000,000""","""Microsoft, Reid Hoffman, Bill …","""$4,000,000,000""","""Artificial intelligence""","""6/29/23"""
"""Anthropic""","""$1,250,000,000""","""Amazon""","""$4,000,000,000""","""Artificial intelligence""","""9/25/23"""
"""Generate Capital""","""$1,030,900,000""","""n/a""","""n/a""","""Energy""","""1/6/23"""


In [43]:
lf.sink_parquet(parquet_output_file_path, maintain_order=False)

### There is more...

In [44]:
partitioned_parquet_input_file_path = '../data/venture_funding_deals_partitioned'
df = pl.read_parquet(
    partitioned_parquet_input_file_path, 
    use_pyarrow=True, 
    pyarrow_options={'partitioning': 'hive'}
)
df.head()

Company,Amount,Lead investors,Valuation,Date reported,Industry
str,str,str,str,str,cat
"""Restaurant365""","""$135,000,000""","""KKR, L Catterton""","""$1,000,000,000""","""5/19/23""","""Accounting"""
"""Madhive""","""$300,000,000""","""Goldman Sachs Asset Management""","""$1,000,000,000""","""6/13/23""","""Advertising"""
"""Ursa Major,""","""$100,000,000""","""BlackRock, Space Capital""","""n/a""","""4/26/23""","""Aerospace"""
"""Indigo""","""$250,000,000""","""Flagship Pioneering, State of …","""na""","""9/15/23""","""Agriculture"""
"""Chronosphere""","""$115,000,000""","""GV""","""n/a""","""1/9/23""","""Analytics"""


In [45]:
partitioned_parquet_output_file_path = '../data/output/venture_funding_deals_partitioned_output'
df.write_parquet(
    partitioned_parquet_output_file_path, 
    use_pyarrow=True, 
    pyarrow_options={
        'partition_cols': ['Industry'],
        'existing_data_behavior': 'overwrite_or_ignore'
        }
)

## Reading and writing Delta Lake tables

### How to do it...

In [46]:
delta_input_file_path = '../data/venture_funding_deals_delta'
df = pl.read_delta(delta_input_file_path)
df.head()

Company,Amount,Lead investors,Valuation,Industry,Date reported
str,str,str,str,str,str
"""OpenAI""","""$10,000,000,000""","""Microsoft""","""n/a""","""Artificial intelligence""","""1/23/23"""
"""Stripe""","""$6,500,000,000""","""n/a""","""$50,000,000,000""","""Fintech""","""3/15/23"""
"""Inflection AI""","""$1,300,000,000""","""Microsoft, Reid Hoffman, Bill …","""$4,000,000,000""","""Artificial intelligence""","""6/29/23"""
"""Anthropic""","""$1,250,000,000""","""Amazon""","""$4,000,000,000""","""Artificial intelligence""","""9/25/23"""
"""Generate Capital""","""$1,030,900,000""","""n/a""","""n/a""","""Energy""","""1/6/23"""


In [47]:
lf = pl.scan_delta(delta_input_file_path)
lf.head().collect()

Company,Amount,Lead investors,Valuation,Industry,Date reported
str,str,str,str,str,str
"""OpenAI""","""$10,000,000,000""","""Microsoft""","""n/a""","""Artificial intelligence""","""1/23/23"""
"""Stripe""","""$6,500,000,000""","""n/a""","""$50,000,000,000""","""Fintech""","""3/15/23"""
"""Inflection AI""","""$1,300,000,000""","""Microsoft, Reid Hoffman, Bill …","""$4,000,000,000""","""Artificial intelligence""","""6/29/23"""
"""Anthropic""","""$1,250,000,000""","""Amazon""","""$4,000,000,000""","""Artificial intelligence""","""9/25/23"""
"""Generate Capital""","""$1,030,900,000""","""n/a""","""n/a""","""Energy""","""1/6/23"""


In [48]:
df.write_delta('../data/output/venture_funding_deals_delta_output', mode='overwrite')

In [49]:
delta_partitioned_output_file_path = '../data/output/venture_funding_deals_delta_partitioned_output'
delta_write_options = {'partition_by': 'Industry'}
df.write_delta(
    delta_partitioned_output_file_path, 
    mode='overwrite', 
    delta_write_options=delta_write_options
)

In [50]:
df = pl.read_delta(delta_partitioned_output_file_path)
df.head()

Company,Amount,Lead investors,Valuation,Industry,Date reported
str,str,str,str,str,str
"""Redwood Materials""","""$1,000,000,000""","""Goldman Sachs Asset Management…","""n/a""","""Renewable energy""","""8/29/23"""
"""Silicon Ranch Corp.""","""$375,000,000""","""n/a""","""n/a""","""Renewable energy""","""1/5/23"""
"""Stack AV""","""$1,000,000,000""","""SoftBand Group""","""n/a""","""Autonomous vehicles""","""9/7/23"""
"""SandboxAQ""","""$500,000,000""","""n/a""","""n/a""","""Artifical intelligence""","""2/14/23"""
"""Humane""","""$100,000,000""","""Kindred Ventures""","""n/a""","""Artifical intelligence""","""3/8/23"""


In [51]:
df = pl.read_delta(
    delta_partitioned_output_file_path, 
    pyarrow_options={'partitions': [('Industry', '=', 'Accounting')]}
)
df.head()

Company,Amount,Lead investors,Valuation,Industry,Date reported
str,str,str,str,str,str
"""Restaurant365""","""$135,000,000""","""KKR, L Catterton""","""$1,000,000,000""","""Accounting""","""5/19/23"""


### There is more...

In [52]:
from config import aws_access_key_id, aws_secret_access_key

In [53]:
table_path = 's3://sandbox-data-lake/letters_delta'
storage_options= {
    'aws_access_key_id': aws_access_key_id,
    'aws_secret_access_key': aws_secret_access_key,
    'aws_region': 'us-west-1'
}

df = pl.read_delta(table_path, storage_options=storage_options)  
df.head()

OSError: Generic S3 error: Client error with status 403 Forbidden: <?xml version="1.0" encoding="UTF-8"?>
<Error><Code>InvalidAccessKeyId</Code><Message>The AWS Access Key Id you provided does not exist in our records.</Message><AWSAccessKeyId>YOUR_AWS_ACCESS_KEY_ID</AWSAccessKeyId><RequestId>8BK33PH5TA2DQAKT</RequestId><HostId>3BSvt4OIJNnGMoejX88c+GSxuzHFgg8nsA9ZzW8es0+0y/33Z9WK5YKLua4c5H2zM27R3JrhqAI=</HostId></Error>

## Reading and writing JSON files

### How to do it...

In [14]:
df = pl.read_json('../data/world_population.json')
df.select(df.columns[:10]).head()

place,pop1980,pop2000,pop2010,pop2022,pop2023,pop2030,pop2050,country,area
i64,f64,f64,f64,f64,i64,f64,f64,str,i64
356,696828385.0,1059600000.0,1240600000.0,1417200000.0,1428627663,1515000000.0,1670500000.0,"""India""",3287590
156,982372466.0,1264100000.0,1348200000.0,1425900000.0,1425671352,1415600000.0,1312600000.0,"""China""",9706961
840,223140018.0,282398554.0,311182845.0,338289857.0,339996563,352162301.0,375391963.0,"""United States""",9372610
360,148177096.0,214072421.0,244016173.0,275501339.0,277534122,292150100.0,317225213.0,"""Indonesia""",1904569
586,80624057.0,154369924.0,194454498.0,235824862.0,240485658,274029836.0,367808468.0,"""Pakistan""",881912


In [27]:
df.write_json('../data/output/world_population_output.json')

In [28]:
df = pl.read_ndjson('../data/world_population.jsonl')
df.select(df.columns[:10]).head()

place,pop1980,pop2000,pop2010,pop2022,pop2023,pop2030,pop2050,country,area
i64,f64,f64,f64,f64,i64,f64,f64,str,i64
356,696828385.0,1059600000.0,1240600000.0,1417200000.0,1428627663,1515000000.0,1670500000.0,"""India""",3287590
156,982372466.0,1264100000.0,1348200000.0,1425900000.0,1425671352,1415600000.0,1312600000.0,"""China""",9706961
840,223140018.0,282398554.0,311182845.0,338289857.0,339996563,352162301.0,375391963.0,"""United States""",9372610
360,148177096.0,214072421.0,244016173.0,275501339.0,277534122,292150100.0,317225213.0,"""Indonesia""",1904569
586,80624057.0,154369924.0,194454498.0,235824862.0,240485658,274029836.0,367808468.0,"""Pakistan""",881912


In [29]:
df.write_ndjson('../data/output/world_population_output.jsonl')

### There is more...

In [30]:
lf = pl.scan_ndjson('../data/world_population.jsonl')
lf.select(lf.columns[:10]).head().collect()

place,pop1980,pop2000,pop2010,pop2022,pop2023,pop2030,pop2050,country,area
i64,f64,f64,f64,f64,i64,f64,f64,str,i64
356,696828385.0,1059600000.0,1240600000.0,1417200000.0,1428627663,1515000000.0,1670500000.0,"""India""",3287590
156,982372466.0,1264100000.0,1348200000.0,1425900000.0,1425671352,1415600000.0,1312600000.0,"""China""",9706961
840,223140018.0,282398554.0,311182845.0,338289857.0,339996563,352162301.0,375391963.0,"""United States""",9372610
360,148177096.0,214072421.0,244016173.0,275501339.0,277534122,292150100.0,317225213.0,"""Indonesia""",1904569
586,80624057.0,154369924.0,194454498.0,235824862.0,240485658,274029836.0,367808468.0,"""Pakistan""",881912


## Reading and writing Excel files

### Getting ready

In [1]:
import polars as pl

### How to do it...

In [2]:
output_file_path = '../data/output/customer_shopping_data.xlsx'

df = pl.read_csv('../data/customer_shopping_data.csv')
df.write_excel(
    output_file_path,
    worksheet='Output Sheet1',
    header_format={'bold': True}
)

<xlsxwriter.workbook.Workbook at 0x108046910>

In [16]:
input_file_path = '../data/output/customer_shopping_data.xlsx'
df = pl.read_excel(
    input_file_path, 
    sheet_name='Output Sheet1',
    engine='xlsx2csv',
    read_options={'try_parse_dates': True}
)
df.head()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i64,str,i64,f64,str,date,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""",2022-08-05,"""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""",2021-12-12,"""Forum Istanbul…"
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""",2021-11-09,"""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""",2021-05-16,"""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""",2021-10-24,"""Kanyon"""


## Reading and writing other data file formats

### How to do it...

In [1]:
import polars as pl

In [2]:
csv_input_file_path = '../data/customer_shopping_data.csv'
ipc_file_path = '../data/output/customer_shopping_data.arrow'
df = pl.read_csv(csv_input_file_path)
df.write_ipc(ipc_file_path)

In [3]:
df = pl.read_ipc(ipc_file_path)
df.head()

invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
str,str,str,i64,str,i64,f64,str,str,str
"""I138884""","""C241288""","""Female""",28,"""Clothing""",5,1500.4,"""Credit Card""","""5/8/2022""","""Kanyon"""
"""I317333""","""C111565""","""Male""",21,"""Shoes""",3,1800.51,"""Debit Card""","""12/12/2021""","""Forum Istanbul…"
"""I127801""","""C266599""","""Male""",20,"""Clothing""",1,300.08,"""Cash""","""9/11/2021""","""Metrocity"""
"""I173702""","""C988172""","""Female""",66,"""Shoes""",5,3000.85,"""Credit Card""","""16/05/2021""","""Metropol AVM"""
"""I337046""","""C189076""","""Female""",53,"""Books""",4,60.6,"""Cash""","""24/10/2021""","""Kanyon"""


In [4]:
avro_file_path = '../data/world_population.avro'
df = (
    pl.read_json('../data/world_population.json')
    .select(['country', 'pop2023', 'density'])
)
df.write_avro(avro_file_path)

In [5]:
df = pl.read_avro(avro_file_path)
df.head()

country,pop2023,density
str,i64,f64
"""India""",1428627663,480.5033
"""China""",1425671352,151.2696
"""United States""",339996563,37.1686
"""Indonesia""",277534122,147.8196
"""Pakistan""",240485658,311.9625


In [6]:
lf = pl.scan_iceberg('../data/my_iceberg_catalog/demo.db/my_table/metadata/00001-7ad1e6e8-7a0d-4455-ac6d-bcca5a45b494.metadata.json')
lf.head().collect()

a,b,c
i64,i64,i64
1,4,7
2,5,8
3,6,9


### There is more...

In [5]:
lf = pl.scan_ipc(ipc_file_path)

In [None]:
lf.sink_ipc('../data/output/customer_shopping_data.arrow')

In [19]:
lf.collect().write_ipc('../data/output/customer_shopping_data_lazy.arrow')

In [None]:
csv_input_file_path = '../data/customer_shopping_data.csv'
lf = pl.scan_csv(csv_input_file_path)
lf.sink_ipc('../data/output/customer_shopping_data_from_csv.arrow')

## Reading and writing multiple files

### How to do it...

In [22]:
data = {'Letter': ['A','B','C'], 'Value': [1,2,3]}
df = pl.DataFrame(data)

In [23]:
dfs = df.group_by(['Letter'])
print(dfs)

<polars.dataframe.group_by.GroupBy object at 0x7321188263f0>


In [24]:
for name, df in dfs:
    df.write_csv(f'../data/output/letter_{name[0]}.csv')

In [25]:
df = pl.read_csv('../data/output/letter_*.csv')
df.head()

Letter,Value
str,i64
"""A""",1
"""B""",2
"""C""",3


In [26]:
lf = pl.scan_csv('../data/output/letter_*.csv')
lf.head().collect()

Letter,Value
str,i64
"""A""",1
"""B""",2
"""C""",3


### There is more...

In [27]:
import glob
lfs = [pl.scan_csv(file) for file in glob.glob('../data/output/letter_*.csv')]
dfs = pl.collect_all(lfs)
dfs

[shape: (1, 2)
 ┌────────┬───────┐
 │ Letter ┆ Value │
 │ ---    ┆ ---   │
 │ str    ┆ i64   │
 ╞════════╪═══════╡
 │ A      ┆ 1     │
 └────────┴───────┘,
 shape: (1, 2)
 ┌────────┬───────┐
 │ Letter ┆ Value │
 │ ---    ┆ ---   │
 │ str    ┆ i64   │
 ╞════════╪═══════╡
 │ C      ┆ 3     │
 └────────┴───────┘,
 shape: (1, 2)
 ┌────────┬───────┐
 │ Letter ┆ Value │
 │ ---    ┆ ---   │
 │ str    ┆ i64   │
 ╞════════╪═══════╡
 │ B      ┆ 2     │
 └────────┴───────┘]

## Working with databases

### How to do it...

In [14]:
import polars as pl

In [15]:
from config import postgres_pass, postgres_user

In [28]:
# connectorx is required
uri = f'postgres://{postgres_user}:{postgres_pass}@localhost:5432/postgres' 
query = 'SELECT * FROM sandbox.cars'
df = pl.read_database_uri(query, uri)
df.head()

NameError: name 'postgres_user' is not defined

In [54]:
# pip install adbc-driver-postgresql pyarrow
df = pl.read_database_uri(query, uri, engine='adbc')
df.head()

NameError: name 'query' is not defined

In [19]:
# pip install sqlalchemy pg8000 or psycopg2 (default is psycopg2)
from sqlalchemy import create_engine

con_string = f'postgresql+pg8000://{postgres_user}:{postgres_pass}@localhost:5432/postgres' 
engine = create_engine(con_string)
conn = engine.connect()

df = pl.read_database(query, connection=conn)
df.head()

brand,model,year
str,str,i64
"""Volvo""","""p1800""",1968
"""BMW""","""M1""",1978
"""Toyota""","""Celica""",1975


In [23]:
df.write_database(table_name="sandbox.cars_output", connection=uri, engine="adbc", if_table_exists='append') 

3

In [24]:
df.write_database(table_name="sandbox.cars_output", connection=con_string, engine="sqlalchemy", if_table_exists='replace')

3

In [22]:
df = pl.read_database_uri('select * from sandbox.cars_output', uri, engine='adbc')
df.head()

brand,model,year
str,str,i64
"""Volvo""","""p1800""",1968
"""BMW""","""M1""",1978
"""Toyota""","""Celica""",1975
