In [1]:
import pandas as pd

## We will use the stock data csv file for this demonstration
![Stock Data CSV file to be used in this example](images/stock_data.jpg "stock_data.jpg")

In [3]:
stock_data_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv")

stock_data_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use Case : The CSV file has an extra header 
![Stock Data CSV file with extra header to be used in this example](images/stock_data_with_extra_header.jpg "stock_data_with_extra_header.jpg")

In [5]:
stock_data_with_extra_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_extra_header.csv")

stock_data_with_extra_header_df

Unnamed: 0,stock data,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


## The actual heading got shifted by one row. We need to use skiprows argument to skip the first row

In [6]:
stock_data_with_extra_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_extra_header.csv",
                                             skiprows = 1)

stock_data_with_extra_header_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## We can also achieve the same result by specifying header = 1 argument, which specifies the position of the header row

In [7]:
stock_data_with_extra_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_extra_header.csv",
                                             header = 1)

stock_data_with_extra_header_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [None]:
## Use Case : The CSV file do not have a header at all
![Stock Data CSV file with no header to be used in this example](images/stock_data_with_no_header.jpg "stock_data_with_no_header.jpg")

In [11]:
stock_data_with_no_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_no_header.csv",
                                            header = None
                                          )

stock_data_with_no_header_df

Unnamed: 0,0,1,2,3,4
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## But it is very difficult to read the columns without names, we need to pass the names argument to set the columns

In [15]:
stock_data_with_no_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_no_header.csv",
                                            header = None,
                                           names = ["tickers","eps","revenue","price","people"]
                                          )

stock_data_with_no_header_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use case : If the data set csv file is very big and we want to read only a few rows to see the sample data then pass the argument nrows

In [17]:
stock_data_sample_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                    nrows = 3
                                  )
#nrows = 3 will read the top 3 rows excluding the header
stock_data_sample_df 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


## Use case : If the data set csv file has blank rows then use the argument skipblank = True

![Stock Data CSV file with blank rows to be used in this example](images/stock_data_with_blank_rows.jpg "stock_data_with_blank_rows.jpg")

In [19]:
stock_data_with_blank_lines_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_blank_rows.csv",
                                   skip_blank_lines = True
                                  )
#skip_blank_lined = True will skip the blank lines
stock_data_with_blank_lines_df 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use case : To interpret the na (not available values differently)

In [24]:
stock_data_na_values_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = ["n.a.","not available",-1]
                                     )

stock_data_na_values_df 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


## Use case : To interpret the na (not available values differently) for different columns

In [27]:
stock_data_na_values_df1 = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = {
                                               'eps': ["not available",-1.00]}
                                     ) # This will only convert the na values in the eps column

stock_data_na_values_df1 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,,85,64,bill gates
3,RIL,,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata
