## Parquet
---

---
### Step 1: BTreeMap -> Arrow RecordBatch.

In [2]:
:dep arrow
:dep parquet

In [3]:
use std::sync::Arc;

In [4]:
use arrow::array::{ArrayRef, StringArray};
use arrow::record_batch::RecordBatch;

In [5]:
let mut src = std::collections::BTreeMap::<String, String>::new();
src.insert("k1".into(), "v1".into());
src.insert("k2".into(), "v2".into());
src.insert("k3".into(), "v3".into());
src

{"k1": "v1", "k2": "v2", "k3": "v3"}

In [6]:
let keys = src.keys().map(|x|x.to_string()).collect::<Vec<String>>();
let vals = src.values().map(|x|x.to_string()).collect::<Vec<String>>();

In [7]:
let keys: ArrayRef = Arc::new(StringArray::from(keys));
let vals: ArrayRef = Arc::new(StringArray::from(vals));

In [8]:
let record_batch = RecordBatch::try_from_iter(vec![("key", keys), ("val", vals)]).unwrap();

In [9]:
record_batch

RecordBatch { schema: Schema { fields: [Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "val", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, columns: [StringArray
[
  "k1",
  "k2",
  "k3",
], StringArray
[
  "v1",
  "v2",
  "v3",
]], row_count: 3 }

---
### Step 2: Arrow RecordBatch -> Parquet Vec<[u8]>

In [10]:
use parquet::arrow::arrow_writer::ArrowWriter;
use parquet::basic::Compression;
use parquet::file::properties::WriterProperties;

In [11]:
let mut buffer = Vec::<u8>::new();
let writer_props = WriterProperties::builder().set_compression(Compression::LZ4).build();
let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), Some(writer_props)).unwrap();
writer.write(&record_batch).unwrap();
writer.close().unwrap();

In [12]:
buffer

[80, 65, 82, 49, 21, 4, 21, 36, 21, 50, 76, 21, 6, 21, 0, 18, 0, 0, 0, 0, 0, 18, 0, 0, 0, 17, 97, 2, 0, 0, 0, 107, 49, 6, 0, 112, 50, 2, 0, 0, 0, 107, 51, 21, 0, 21, 8, 21, 26, 44, 21, 6, 21, 16, 21, 6, 21, 6, 28, 88, 2, 107, 51, 24, 2, 107, 49, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 5, 64, 2, 3, 36, 0, 21, 12, 25, 53, 0, 6, 16, 25, 24, 3, 107, 101, 121, 21, 10, 22, 6, 22, 126, 22, 158, 1, 38, 78, 38, 0, 28, 88, 2, 107, 51, 24, 2, 107, 49, 0, 0, 21, 4, 21, 36, 21, 50, 76, 21, 6, 21, 0, 18, 0, 0, 0, 0, 0, 18, 0, 0, 0, 17, 97, 2, 0, 0, 0, 118, 49, 6, 0, 112, 50, 2, 0, 0, 0, 118, 51, 21, 0, 21, 8, 21, 26, 44, 21, 6, 21, 16, 21, 6, 21, 6, 28, 88, 2, 118, 51, 24, 2, 118, 49, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 5, 64, 2, 3, 36, 0, 21, 12, 25, 53, 0, 6, 16, 25, 24, 3, 118, 97, 108, 21, 10, 22, 6, 22, 126, 22, 158, 1, 38, 78, 38, 0, 28, 88, 2, 118, 51, 24, 2, 118, 49, 0, 0, 25, 17, 2, 25, 24, 2, 107, 49, 25, 24, 2, 107, 51, 21, 0, 25, 22, 0, 0, 25, 17, 2, 25, 24, 2, 118, 49, 25, 24, 2, 118, 51, 21, 0, 25, 22

---
### Step 3: Parquet Vec<[u8]> -> Parquet File

In [13]:
use std::fs::File;
use std::io::Write;

In [14]:
let mut file = File::create("test.parq").unwrap();
file.write_all(&buffer);

---
### Step 4: Parquet File -> Polars DataFrame

In [23]:
:dep polars-core = { features = ["fmt"] }
:dep polars-io = { features = ["parquet"] }

In [24]:
use polars_io::parquet::ParquetReader;
use polars_io::SerReader;

In [28]:
let mut file = File::open("test.parq").unwrap();

In [29]:
let df = ParquetReader::new(file).finish().unwrap();

In [30]:
df

shape: (3, 2)
┌─────┬─────┐
│ key ┆ val │
│ --- ┆ --- │
│ str ┆ str │
╞═════╪═════╡
│ k1  ┆ v1  │
│ k2  ┆ v2  │
│ k3  ┆ v3  │
└─────┴─────┘