## Import Pandas

In [None]:
import pandas as pd

## 1Ô∏è `sep` parameter (Delimiter)

sep parameter (Delimiter)
üîπ Purpose

Used when the file is not comma-separated (e.g., tab \t, pipe |, semicolon ;).

Explanation

Pandas expects commas by default

sep="\t" tells Pandas to split columns using tab

In [None]:
data = """1	Inception	2010
2	Interstellar	2014
3	Tenet	2020"""

with open("movies.tsv", "w") as f:
    f.write(data)


In [None]:
pd.read_csv('movies.tsv') ## error

In [None]:
pd.read_csv('movies.tsv', sep='\t')

## 2Ô∏è `names` parameter

names parameter (Custom column names)
üîπ Purpose

Used when:

No header exists

Header row is treated as data

üìù Explanation

header=None ‚Üí no header present

names=[...] ‚Üí manually define column name

In [None]:
pd.read_csv(
    "movies.tsv",
    sep="\t",
    header=None,
    names=["serial_no", "movie_name", "release_year"]
)

## 3Ô∏è `index_col` parameter

index_col parameter
üîπ Purpose

Used to make a column act as DataFrame index

Explanation

Removes redundant numeric index

Useful for unique identifiers

In [None]:
data = """Enrol_ID,Name,Score
101,Aman,85
102,Riya,90
103,Kunal,78"""

with open("students.csv", "w") as f:
    f.write(data)
df = pd.read_csv("students.csv") ## default behavior

In [None]:
pd.read_csv('students.csv', index_col='Enrol_ID')

## 4Ô∏è `header` parameter

header parameter
üîπ Purpose

Explicitly tells Pandas which row contains headers

Explanation

header=0 ‚Üí first row is header

Prevents headers being read as data

In [None]:
data = """0,Name,Age
1,Aman,22
2,Riya,21"""

with open("test.csv", "w") as f:
    f.write(data)


In [None]:
pd.read_csv('test.csv', header=0)

## 5Ô∏è `usecols` parameter

Purpose

Load only required columns

Explanation

Saves memory

Faster loading for large datasets

In [None]:
pd.read_csv(
    "students.csv",
    usecols=["Enrol_ID", "Score"]
)

##  6 `skiprows` parameter

skiprows parameter
üîπ Purpose

Skip unwanted rows



In [None]:
data = """Comment line
Another useless line
ID,Name,Marks
1,Aman,80
2,Riya,90"""

with open("skip.csv", "w") as f:
    f.write(data)


In [None]:
pd.read_csv('skip.csv', skiprows=[0,1])

## 8 `nrows` parameter

Purpose

Load only top N rows

Explanation

Useful for sampling large datasets

In [None]:
pd.read_csv('students.csv', nrows=2)

## 8 `encoding` parameter 

Special characters used

√© ‚Üí Caf√©

√± ‚Üí Ni√±o

√© ‚Üí Beyonc√©

These characters break UTF-8 decoding if encoding is not specified.

Common Encodings 
Encoding	When used
utf-8	Default, most modern files
latin-1	Older datasets, Indian datasets
ISO-8859-1	Similar to latin-1
cp1252	Windows-generated CSVs

In [None]:
data = """restaurant,city,rating
Caf√© Coffee Day,Mumbai,4.1
El Ni√±o,Delhi,4.3
Beyonc√© Bistro,Pune,4.5
"""

with open("encoding_demo.csv", "w", encoding="latin-1") as f:
    f.write(data)


In [None]:
pd.read_csv('encoding_demo.csv', encoding='latin-1')

## 9 `on_bad_lines` Parameter 

It handles rows with inconsistent number of columns, which usually cause this error:

ParserError: Expected X fields in line Y, saw Z

Behavior

Bad rows are skipped

Warning messages are shown

Data still loads

In [None]:
data = """student_id,name,marks
101,Aman,85
102,Riya,90
103,Kunal
104,Neha,88,ExtraValue
105,Arjun,92
"""

with open("bad_lines_demo.csv", "w") as f:
    f.write(data)


In [None]:
pd.read_csv(
    "bad_lines_demo.csv",
    on_bad_lines="skip"
)

## 10 `dtype` parameter

Purpose

Force data types

Explanation

Saves memory

Ensures type consistency

In [None]:
data = """target
9.0
0.0
1.0"""

with open("dtype.csv", "w") as f:
    f.write(data)

pd.read_csv("dtype.csv", dtype={"target": int}).dtypes

## 11 `parse_dates` parameter

Purpose

Convert string ‚Üí datetime

In [None]:
data = """date,match
2023-03-15,MI vs CSK
2023-03-18,RCB vs KKR"""

with open("ipl.csv", "w") as f:
    f.write(data)

pd.read_csv("ipl.csv", parse_dates=["date"]).dtypes

## 12 `converters` parameter

Purpose

Apply function while loading



In [None]:
pd.read_csv(
    "ipl.csv",
    converters={"match": lambda x: x.replace("Royal Challengers Bangalore", "RCB")}
)

## 13 `na_values` parameter

Purpose

Custom missing values

Explanation

Converts specified values ‚Üí NaN

In [None]:
data = """Name,Gender
Aman,Male
Riya,Female
Kunal,Male"""

with open("gender.csv", "w") as f:
    f.write(data)

pd.read_csv("gender.csv", na_values=["Male"])

## 14 `chunksize` parameter

Purpose

Process huge files in part

Explanation

Loads data in manageable blocks

Prevents RAM overflow

In [None]:
# Example (file assumed large)
# chunks = pd.read_csv("large.csv", chunksize=500000)
# for chunk in chunks:
#     print(chunk.shape)