In [1]:
import pandas as pd

# Advanced Reading Options

## 0. Setup a sample CSV for demonstration
- 'sample.csv'
- `;` separator
- Missing values - `?`
- `id` column is suitable for index

## 1. Handling custom separator (`sep`)

In [2]:
pd.read_csv("sample.csv", sep=';')

Unnamed: 0,id,name,age,score
0,1,Onkar,21,88
1,2,Amit,25,?
2,3,Sara,23,92


## 2. Handling headers rows (`header`)

### Case I: CSV has header

In [3]:
pd.read_csv("sample.csv", sep=';', header=0) # header=0 is by default behavior of pandas we can assign any other index for header

Unnamed: 0,id,name,age,score
0,1,Onkar,21,88
1,2,Amit,25,?
2,3,Sara,23,92


### Case II: CSV does not have header
`.read_csv` have `name=` parameter to assign the column names  
Or insted we can do   
`DataFrame` have `columns=` parameter to assign columns names

In [16]:
df = pd.read_csv("sample.csv", sep=';', header=None, names=["id","name","age","score"])
df

Unnamed: 0,id,name,age,score
0,id,name,age,score
1,1,Onkar,21,88
2,2,Amit,25,?
3,3,Sara,23,92


OR

In [17]:
df.columns=["id_","name_","age_","score_"]
df

Unnamed: 0,id_,name_,age_,score_
0,id,name,age,score
1,1,Onkar,21,88
2,2,Amit,25,?
3,3,Sara,23,92


## 3. Handling index column (`index`)

The CSV file have column which can be used as index then we do

In [19]:
pd.read_csv("sample.csv", sep=';', index_col=0) # Can write column name also or index od column

Unnamed: 0_level_0,name,age,score
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Onkar,21,88
2,Amit,25,?
3,Sara,23,92


## 4. Handling missing values

When missing values are represented by something like str or any symbol then we use `na_values=` parameter

In [21]:
pd.read_csv("sample.csv", sep=';', na_values=['?']) # the cell containing '?' will considered as NaN by pandas

Unnamed: 0,id,name,age,score
0,1,Onkar,21,88.0
1,2,Amit,25,
2,3,Sara,23,92.0


## 5. Combining everything

In [22]:
pd.read_csv(
    "sample.csv",
    sep=';',
    header=0, # First row is by default header but we can assign another row as header
    # header=None # If there is no header so mention None and add columns=[...]
    index_col=0,
    na_values=['?']
)

Unnamed: 0_level_0,name,age,score
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Onkar,21,88.0
2,Amit,25,
3,Sara,23,92.0
