In [3]:
import pandas as pd

## We will use the stock data csv file for this demonstration
![Stock Data CSV file to be used in this example](images/stock_data.jpg "stock_data.jpg")

In [4]:
stock_data_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv")

stock_data_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use Case : The CSV file has an extra header 
![Stock Data CSV file with extra header to be used in this example](images/stock_data_with_extra_header.jpg "stock_data_with_extra_header.jpg")

In [5]:
stock_data_with_extra_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_extra_header.csv")

stock_data_with_extra_header_df

Unnamed: 0,stock data,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani
5,TATA,5.6,-1,n.a.,ratan tata


## The actual heading got shifted by one row. We need to use skiprows argument to skip the first row

In [6]:
stock_data_with_extra_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_extra_header.csv",
                                             skiprows = 1)

stock_data_with_extra_header_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## We can also achieve the same result by specifying header = 1 argument, which specifies the position of the header row

In [7]:
stock_data_with_extra_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_extra_header.csv",
                                             header = 1)

stock_data_with_extra_header_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use Case : The CSV file do not have a header at all
![Stock Data CSV file with no header to be used in this example](images/stock_data_with_no_header.jpg "stock_data_with_no_header.jpg")

In [8]:
stock_data_with_no_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_no_header.csv",
                                            header = None
                                          )

stock_data_with_no_header_df

Unnamed: 0,0,1,2,3,4
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## But it is very difficult to read the columns without names, we need to pass the names argument to set the columns

In [9]:
stock_data_with_no_header_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_no_header.csv",
                                            header = None,
                                           names = ["tickers","eps","revenue","price","people"]
                                          )

stock_data_with_no_header_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use case : If the data set csv file is very big and we want to read only a few rows to see the sample data then pass the argument nrows

In [10]:
stock_data_sample_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                    nrows = 3
                                  )
#nrows = 3 will read the top 3 rows excluding the header
stock_data_sample_df 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


## Use case : If the data set csv file has blank rows then use the argument skipblank = True

![Stock Data CSV file with blank rows to be used in this example](images/stock_data_with_blank_rows.jpg "stock_data_with_blank_rows.jpg")

In [11]:
stock_data_with_blank_lines_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_with_blank_rows.csv",
                                   skip_blank_lines = True
                                  )
#skip_blank_lined = True will skip the blank lines
stock_data_with_blank_lines_df 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## Use case : To interpret the na (not available values differently)

## *In the following example we want to convert the values "n.a.", "not available", -1 to NaN*

In [12]:
stock_data_na_values_df = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = ["n.a.","not available",-1]
                                     )

stock_data_na_values_df 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


## Use case : To interpret the na (not available values differently) for different columns

## *Here we want to interpret the values "not available" and -1.00 in the column "eps" and -1 in the column revenue to be converted to NaN*

In [17]:
stock_data_na_values_df1 = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = {
                                               'eps': ["not available",-1],
                                               'revenue': [-1]
                                      }
                                     ) # Instead of supplying a list we will supply a dictionary which will only convert the na values in the eps and revenue columns

stock_data_na_values_df1 

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,n.a.
2,MSFT,,85.0,64,bill gates
3,RIL,,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,ratan tata


## Use case : to create a data frame with only the specified columns

## *Pass a list with the column names to be used in the data frame**

In [18]:
stock_data_na_values_df2 = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = {
                                               'eps': ["not available",-1],
                                               'revenue': [-1]
                                      },
                                       usecols = ["tickers","eps"]
                                     ) 
stock_data_na_values_df2 

Unnamed: 0,tickers,eps
0,GOOGL,27.82
1,WMT,4.61
2,MSFT,
3,RIL,
4,TATA,5.6


## Use case : To use a specific column as the index
## *Pass the column name to be used as index*


In [26]:
stock_data_na_values_df3 = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = {
                                               'eps': ["not available",-1],
                                               'revenue': [-1]
                                      },
                                       index_col = "tickers"
                                     ) 

stock_data_na_values_df3 

Unnamed: 0_level_0,eps,revenue,price,people
tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GOOGL,27.82,87.0,845,larry page
WMT,4.61,484.0,65,n.a.
MSFT,,85.0,64,bill gates
RIL,,50.0,1023,mukesh ambani
TATA,5.6,,n.a.,ratan tata


## The parameter squueze = True try to squueze the dimension if possible

In [32]:
stock_data_na_values_df4 = pd.read_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data.csv",
                                      na_values = {
                                               'eps': ["not available",-1],
                                               'revenue': [-1]
                                      },
                                       squeeze = True
                                     ) 

type(stock_data_na_values_df4)

pandas.core.frame.DataFrame

## Writing a data frame back to a csv file

In [33]:
stock_data_na_values_df1.to_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_output.csv")

## stock_data_output.csv

![stock_data_output.csv](images/stock_data_output.jpg "stock_data_output.jpg")

## By default the index column is written to the csv file. To exclude the index column pass the argument index = False 

In [34]:
stock_data_na_values_df1.to_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_output1.csv", index = False)

## stock_data_output1.csv

![stock_data_output1.csv](images/stock_data_output1.jpg "stock_data_output.jpg")

## To create the csv with only selected columns (tickers and eps in this case)

In [35]:
stock_data_na_values_df1.to_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_output2.csv", 
                                index = False,
                               columns = ["tickers","eps"])

## stock_data_output2.csv

![stock_data_output2.csv](images/stock_data_output2.jpg "stock_data_output2.jpg")

## To create the csv without any header pass the argument header = False

In [36]:
stock_data_na_values_df1.to_csv("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_output3.csv", 
                                index = False,
                               header = False)

## stock_data_output3.csv

![stock_data_output3.csv](images/stock_data_output3.jpg "stock_data_output3.jpg")

# To demonstrate the options while creating a data frame from an excel file

In [41]:
stock_data_xlsx_df = pd.read_excel("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_xlsx.xlsx",0) 
# 0 is to read the first sheet
# Alternatively we can also pass "Sheet1" or the sheet name to read that particular sheet
stock_data_xlsx_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


## To convert the cell values while creating a data frame from an excel file.

## *Here we want to convert the value "n.a." in the column pepole for Walmart to "Sam Walton"*

In [44]:
def convert_people_cell(cell):
    if cell == "n.a.":
        return "Sam Walton"
    else:
        return cell


stock_data_xlsx_df = pd.read_excel("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_xlsx.xlsx",0,
                                  converters = {
                                      'people': convert_people_cell
                                  }
                                ) 
# 0 is to read the first sheet
# Alternatively we can also pass "Sheet1" or the sheet name to read that particular sheet
stock_data_xlsx_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,Sam Walton
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


# Writing back a data frame to an excel file

In [48]:
stock_data_xlsx_df.to_excel("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_xlsx_out.xlsx",
                            sheet_name = "Stock Ticker")

## stock_data_excel.xlsx

![stock_data_excel.xlsx](images/stock_data_excel.jpg "stock_data_excel.jpg")

## To exclude the index column and start writing at row=1 and col=2

In [49]:
stock_data_xlsx_df.to_excel("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_xlsx_out1.xlsx",
                            index = False,
                            startrow = 1,
                            startcol = 1,
                            sheet_name = "Stock Ticker")

## stock_data_excel1.xlsx

![stock_data_excel1.xlsx](images/stock_data_excel1.jpg "stock_data_excel1.jpg")

## To write two data frames into two different sheets of the same excel file

In [57]:
stock_data_xlsx_df_ticker = stock_data_xlsx_df[["tickers","people"]]
stock_data_xlsx_df_rev = stock_data_xlsx_df[["tickers","eps","revenue"]]

with pd.ExcelWriter("C:\\PythonTutorial\\MyPandas_Blog\\data sets\\stock_data_xlsx_out2.xlsx") as excel_writer:
    stock_data_xlsx_df_ticker.to_excel(excel_writer, sheet_name = "stocks")
    stock_data_xlsx_df_rev.to_excel(excel_writer, sheet_name = "revenue")

## stock_data_xlsx_out2.xlsx

![stock_data_xlsx_out2.xlsx](images/stock_data_excel_out_tab1.jpg "stock_data_excel_out_tab1.jpg")
![stock_data_xlsx_out2.xlsx](images/stock_data_excel_out_tab2.jpg "stock_data_excel_out_tab2.jpg")

