In [4]:
# What Are I/O Formats in Pandas?
# I/O stands for Input/Output ‚Äì basically, how you read data into pandas and write (save) data from pandas in various formats like:

# ‚úÖ CSV

# ‚úÖ Parquet

# ‚úÖ Gzip

# ‚úÖ Excel

# ‚úÖ JSON Lines

In [5]:
import pandas as pd

# Sample data
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['Delhi', 'Mumbai', 'Bangalore']
}

df = pd.DataFrame(data)
df


Unnamed: 0,Name,Age,City
0,Alice,25,Delhi
1,Bob,30,Mumbai
2,Charlie,35,Bangalore


In [7]:
# Parquet Format (Fast, columnar, good for big data)

df.to_parquet("data.parquet")
df_parquet = pd.read_parquet("data.parquet")
print(df_parquet)


      Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [None]:
#  Why use Parquet?

# It's a binary format optimized for performance.

# Great for large datasets.

# Supported by big data tools like Spark.

# üõ†Ô∏è Requires pyarrow or fastparquet:

In [None]:
# Gzip (Compressed CSV or JSON)
# üì• Write CSV with Gzip compression:

In [8]:
df.to_csv("data.csv.gz", compression='gzip')

# Why use Gzip?
# Reduces file size.
# Useful for large files or storing logs.

In [9]:
#  Read compressed CSV:

df_gzip = pd.read_csv("data.csv.gz", compression='gzip')
print(df_gzip)


   Unnamed: 0     Name  Age       City
0           0    Alice   25      Delhi
1           1      Bob   30     Mumbai
2           2  Charlie   35  Bangalore


In [11]:
df.to_excel("data.xlsx", index=False)


In [12]:
# read excel data
df_excel = pd.read_excel("data.xlsx")
print(df_excel)


      Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [13]:
# JSON Lines Format (Line-delimited JSON objects)

df.to_json("data.jsonl", orient='records', lines=True)


In [14]:
df_jsonl = pd.read_json("data.jsonl", lines=True)
print(df_jsonl)


      Name  Age       City
0    Alice   25      Delhi
1      Bob   30     Mumbai
2  Charlie   35  Bangalore


In [None]:
# Summary Table
# Format	Read	Write	Notes
# Parquet	pd.read_parquet()	df.to_parquet()	Fast, efficient, needs pyarrow
# Gzip (CSV)	pd.read_csv(..., compression='gzip')	df.to_csv(..., compression='gzip')	Small size, good for archiving
# Excel	pd.read_excel()	df.to_excel()	Needs openpyxl
# JSON Lines	pd.read_json(..., lines=True)	df.to_json(..., lines=True)	Used for logs, stream data

