# Read and write Parquet - Tutorial

## Imports

In [1]:
import pyarrow as pa

In [2]:
import pyarrow.parquet as pq

## Read a dummy CSV


In [3]:
import pandas as pd
import os

pdf = pd.read_csv(os.path.join(os.getcwd(),"data","sample-data-for-file-reads.csv"))

In [4]:
a_table = pa.Table.from_pandas(pdf)

## Write table to Parquet

In [5]:
pq.write_table(a_table, os.path.join(os.getcwd(),"data","a_table_example.parquet"), compression=None)

## Read from Parquet

In [6]:
a_new_table = pq.read_table(os.path.join(os.getcwd(),"data","a_table_example.parquet"))

In [7]:
a_new_table

pyarrow.Table
Num: int64
Char: string
Varchar: string
Date_Field: string
Categorical: string
Percentage: string
----
Num: [[1,2,3,4,5,5]]
Char: [["This is a char","Here is a char","This is a char","Here is a char","This is a char","Here is a char"]]
Varchar: [["Random Wilipedia: James Michael Vince (born 14 March 1991) is an English cricketer who plays for Hampshire County Cricket Club in T20 cricket and plays for the England cricket team.","Random Wilipedia He captained Hampshire in all formats until he declared his retirement from red ball cricket in 2025.[1] Vince was part of the England squad that won the 2019 Cricket World Cup.[2] He is a right-handed middle-order batter who is also a right-arm medium pace bowler. He made his international debut for England in May 2015.[3]","Random Wilipedia Tulsidham Ke Laddu Gopal (transl. Tulsidham's Laddu Gopal) is an Indian Hindi-language Socio-Mytho series starring Akshita Mudgal and Heth Makhwana.[1] It premiered on 21 August 2023 and aired

## An example of how to read in specific columns and apply filtering on the base Parquet file
#### This helps you take advantage of the columnar format of Parquet

In [8]:
filtered_table = pq.read_table(os.path.join(os.getcwd(),"data","a_table_example.parquet"),
                      columns=["Date_Field", "Categorical", "Char"],
                      filters=[
                          ("Num", ">", 3),
                          ("Num", "<", 5),
                      ])

In [9]:
filtered_table

pyarrow.Table
Date_Field: string
Categorical: string
Char: string
----
Date_Field: [["20-Feb-25"]]
Categorical: [["N"]]
Char: [["Here is a char"]]

#### Also note that the column used as a filter need not be carried over