forked from pola-rs/polars
/
test_io.py
54 lines (42 loc) · 1.26 KB
/
test_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import io
from utils import get_complete_df
import polars as pl
import pandas as pd
import numpy as np
def test_to_from_buffer():
df = get_complete_df()
df = df.drop("strings_nulls")
for to_fn, from_fn in zip(
[df.to_parquet, df.to_csv], [df.read_parquet, df.read_csv]
):
f = io.BytesIO()
to_fn(f)
f.seek(0)
df_1 = from_fn(f)
assert df.frame_equal(df_1, null_equal=True)
def test_read_web_file():
url = "https://raw.githubusercontent.com/ritchie46/polars/master/examples/aggregate_multiple_files_in_chunks/datasets/foods1.csv"
df = pl.read_csv(url)
assert df.shape == (27, 4)
def test_parquet_chunks():
"""
This failed in https://github.com/ritchie46/polars/issues/545
"""
cases = [
1048576,
1048577,
]
for case in cases:
f = io.BytesIO()
# repeat until it has case instances
df = pd.DataFrame(
np.tile([1.0, pd.to_datetime("2010-10-10")], [case, 1]),
columns=["floats", "dates"],
)
print(df)
# write as parquet
df.to_parquet(f)
print(f"reading {case} dates with polars...", end="")
f.seek(0)
# read it with polars
polars_df = pl.read_parquet(f)