In [None]:
ls #path

Boston_housing.csv  CSV_EX_1.csv  CSV_EX_2.csv  CSV_EX_3.csv  [0m[01;34msample_data[0m/


In [None]:
pwd #current working directory

'/content'

In [None]:
cd ..

/


In [None]:
cat /content/CSV_EX_3.csv

Bedroom; Sq. foot; Locality; Price ($)
2; 1500; Good; 300000
3; 1300; Fair; 240000
3; 1900; Very good; 450000
3; 1850; Bad; 280000
2; 1640; Good; 310000

In [None]:
import pandas as pd

**Reading from a CSV file**

Default case

In [None]:
df1 = pd.read_csv("/content/CSV_EX_1.csv")

In [None]:
df1

Unnamed: 0,Bedroom,Sq. foot,Locality,Price ($)
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000
3,3,1850,Bad,280000
4,2,1640,Good,310000


Missing headers

In [None]:
# If there is no header in csv, first row will be considered as header by default. 
# By using header=None, first rows will also be added in dataset as data.
df2 = pd.read_csv("/content/CSV_EX_2.csv", header=None)

In [None]:
df2 # If header=None, column index will be considered as header.

Unnamed: 0,0,1,2,3
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000
3,3,1850,Bad,280000
4,2,1640,Good,310000


Custom column headers

In [None]:
df2 = pd.read_csv("/content/CSV_EX_2.csv", names=['Bedroom','Sq. foot', 'Locality','Price ($)'])

In [None]:
df2

Unnamed: 0,Bedroom,Sq. foot,Locality,Price ($)
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000
3,3,1850,Bad,280000
4,2,1640,Good,310000


Non-default separators

In [None]:
df3 = pd.read_csv("/content/CSV_EX_3.csv")

Default seperator is ",". But sometimes, when csv using custom seperator is used, csv file will not be read properly and there are change that it can be considered as entire single column demonstrated below.

In [None]:
df3

Unnamed: 0,Bedroom; Sq. foot; Locality; Price ($)
0,2; 1500; Good; 300000
1,3; 1300; Fair; 240000
2,3; 1900; Very good; 450000
3,3; 1850; Bad; 280000
4,2; 1640; Good; 310000


In [None]:
# To avoid this, seperator can be defined.
df3 = pd.read_csv("/content/CSV_EX_3.csv",sep=';')

In [None]:
df3

Unnamed: 0,Bedroom,Sq. foot,Locality,Price ($)
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000
3,3,1850,Bad,280000
4,2,1640,Good,310000


Skip rows

Files can have normal text like descriptions. To avoid those to be read as data, skiprows will be used. skiprows skips the text and unnecessary details.

In [None]:
df5 = pd.read_csv('/content/CSV_EX_skiprows.csv', skiprows=2) #skips first two rows from the csv file.

In [None]:
df5

Unnamed: 0,Bedroom,Sq. foot,Locality,Price ($)
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000
3,3,1850,Bad,280000
4,2,1640,Good,310000


Skip footers

skipfooter skips rows from the bottom of the file.

In [None]:
df6 = pd.read_csv('/content/CSV_EX_skipfooter.csv', skiprows=2, skipfooter=1, engine='python')
df6

Unnamed: 0,Bedroom,Sq. foot,Locality,Price ($)
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000
3,3,1850,Bad,280000
4,2,1640,Good,310000



Read only first 'n' rows

In [None]:
df7 = pd.read_csv('/content/CSV_EX_1.csv', nrows=3)
df7

Unnamed: 0,Bedroom,Sq. foot,Locality,Price ($)
0,2,1500,Good,300000
1,3,1300,Fair,240000
2,3,1900,Very good,450000


Reading in chunks (skip + nrows)

In [None]:
list_of_dataframes = []
rows_in_a_chunk = 10
num_chunks = 5
df_dummy = pd.read_csv('/content/Boston_housing.csv', nrows=2)
col_names = df_dummy.columns
for i in range(0, num_chunks*rows_in_a_chunk):
  df = pd.read_csv('/content/Boston_housing.csv', header=0, skiprows=2) #TODO
  list_of_dataframes.append(df)

In [None]:
list_of_dataframes[0]

Unnamed: 0,0.02731,0,7.07,0.1,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
0,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
1,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
2,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
3,0.02985,0.0,2.18,0,0.458,6.430,58.7,6.0622,3,222,18.7,394.12,5.21,28.7
4,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.60,12.43,22.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
500,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
501,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
502,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


Skip blank lines

**Reading from a compressed file**

Read from a zip file

**Read from an Excel sheet with sheet name**

**Read from a json file**