# Python Pandas Tutorial 4: Read Write Excel CSV File  
https://www.youtube.com/watch?v=-0NwrcZOKhQ

In [26]:
import pandas as pd

## CSV Files
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html

In [None]:
df = pd.read_csv("stock_data.csv")
df

Suppose your file has an extra row above headers:

In [4]:
df = pd.read_csv("stock_data2.csv")
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,extra header
tickers,eps,revenue,price,people
GOOGL,27.82,87,845,larry page
WMT,4.61,484,65,n.a.
MSFT,-1,85,64,bill gates
RIL,not available,50,1023,mukesh ambani
TATA,5.6,-1,n.a.,ratan tata


In [5]:
df = pd.read_csv("stock_data2.csv", skiprows=1)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [3]:
df = pd.read_csv("stock_data2.csv", header=1)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


Suppose your file doesn't have headers included:

In [7]:
df = pd.read_csv("stock_data3.csv")
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [8]:
df = pd.read_csv("stock_data3.csv", header=None)
df

Unnamed: 0,0,1,2,3,4
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


You can add the header names here as well:

In [9]:
df = pd.read_csv("stock_data3.csv", header=None, names=['tickers', 'eps', 'revenue', 'price', 'people'])
df

Unnamed: 0,ticker,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


You can choose the number of rows to import (excluding your header):

In [10]:
df = pd.read_csv("stock_data.csv", nrows=3)
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1.0,85,64,bill gates


Cleaning: Convert NA values that aren't imported as NaN to NaN:

In [11]:
df = pd.read_csv("stock_data.csv", na_values=['not available', 'n.a.'])
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


Deal with the -1 in the revenue column (revenue can't be negative):  
Use a dictionary because you don't want to convert the -1 from the eps column.

In [18]:
naVals = ['not available', 'n.a.']
df = pd.read_csv("stock_data.csv", na_values={'eps' : naVals, 
                                              'revenue' : naVals + [-1],
                                              'price' : naVals,
                                              'people' : naVals})
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


Write back to CSV:

In [20]:
df.to_csv('new.csv')

Note: this also writes the *index* to the CSV file.  
If you don't want the index, then index=False

In [21]:
df.to_csv('new.csv', index=False)

What if you only want to write certain columns?  
columns=['column', 'names']

In [22]:
df.columns

Index(['tickers', 'eps', 'revenue', 'price', 'people'], dtype='object')

In [24]:
df.to_csv('new.csv', index=False, columns=['tickers', 'eps'])

What if you want to skip exporting the header?  
header=False

In [25]:
df.to_csv('new.csv', index=False, header=False)

## XLS Files

In [27]:
import pandas as pd

In [29]:
df = pd.read_excel('stock_data.xlsx', 'Sheet1')
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


Change n.a. in people column to Sam Walton using converter:

In [33]:
def convertPeopleCell(cell):
    if cell=='n.a.':
        return 'sam walton'
    return cell

def convertEpsCell(cell):
    if cell=='not available':
        return None
    return cell

In [35]:
df = pd.read_excel('stock_data.xlsx', 'Sheet1', converters = {
    'people' : convertPeopleCell,
    'eps' : convertEpsCell
})
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,sam walton
2,MSFT,-1.0,85,64,bill gates
3,RIL,,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


Write Excel file:

In [40]:
df.to_excel('new.xlsx', index=False, sheet_name='stocks') ## , encoding="UTF-8")

Start writing only at certain columns / rows (offset; leaves empty cols/rows):  
startrow=# , startcol=#

In [41]:
df.to_excel('new.xlsx', index=False, sheet_name='stocks', startrow=1, startcol=2)

If you have 2 DFs that you want to write to the same Excel file in different Sheets:  

In [43]:
# First, create the 2 DFs:

df_stocks = pd.DataFrame({
    'tickers': ['GOOGL', 'WMT', 'MSFT'],
    'price': [845, 65, 64],
    'pe': [30.37, 14.26, 30.97],
    'eps': [27.82, 4.61, 2.12]
})

df_weather = pd.DataFrame({
    'day': ['1/1/2017', '1/2/2017', '1/3/2017'],
    'temperature': [32, 35, 28],
    'event': ['Rain', 'Sunny', 'Snow']
})

In [44]:
df_stocks

Unnamed: 0,tickers,price,pe,eps
0,GOOGL,845,30.37,27.82
1,WMT,65,14.26,4.61
2,MSFT,64,30.97,2.12


In [45]:
df_weather

Unnamed: 0,day,temperature,event
0,1/1/2017,32,Rain
1,1/2/2017,35,Sunny
2,1/3/2017,28,Snow


There is a Class called ExcelWriter

In [48]:
with pd.ExcelWriter('stocks_weather.xlsx') as writer:
    df_stocks.to_excel(writer, sheet_name='stocks', index=False)
    df_weather.to_excel(writer, sheet_name='weather', index=False)