# Reading Excel

In [6]:
import pandas as pd

#### You should specify a sheet name, or other identifier

In [12]:
aapl = pd.read_excel('output.xlsx', 'AAPL').head()
aapl

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2017-01-03,116.330002,114.760002,115.800003,116.150002,28781900,111.286987
1,2017-01-04,116.510002,115.75,115.849998,116.019997,21118100,111.162437
2,2017-01-05,116.860001,115.809998,115.919998,116.610001,22193600,111.727715
3,2017-01-06,118.160004,116.470001,116.779999,117.910004,31751900,112.973305
4,2017-01-09,119.43,117.940002,117.949997,118.989998,33561900,114.00808


#### You can also use a 0-indexed integer 

In [13]:
aapl = pd.read_excel('output.xlsx', 0)
aapl.head()

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2017-01-03,116.330002,114.760002,115.800003,116.150002,28781900,111.286987
1,2017-01-04,116.510002,115.75,115.849998,116.019997,21118100,111.162437
2,2017-01-05,116.860001,115.809998,115.919998,116.610001,22193600,111.727715
3,2017-01-06,118.160004,116.470001,116.779999,117.910004,31751900,112.973305
4,2017-01-09,119.43,117.940002,117.949997,118.989998,33561900,114.00808


#### And you can read multiple sheets with list-like syntax

In [15]:
stocks = ['FB', 'GOOG']
pd.read_excel('output.xlsx', stocks)

OrderedDict([('FB',
                        Date        High         Low        Open       Close    Volume  \
              0   2017-01-03  117.839996  115.510002  116.029999  116.860001  20663900   
              1   2017-01-04  119.660004  117.290001  117.550003  118.690002  19630900   
              2   2017-01-05  120.949997  118.320000  118.860001  120.669998  19492200   
              3   2017-01-06  123.879997  120.029999  120.980003  123.410004  28545300   
              4   2017-01-09  125.430000  123.040001  123.550003  124.900002  22880400   
              5   2017-01-10  125.500000  124.279999  124.820000  124.349998  17324600   
              6   2017-01-11  126.120003  124.059998  124.349998  126.089996  18356500   
              7   2017-01-12  126.730003  124.800003  125.610001  126.620003  18653900   
              8   2017-01-13  129.270004  127.370003  127.489998  128.339996  24884300   
              9   2017-01-17  128.339996  127.400002  128.039993  127.870003  15

#### If you need only part of a sheet you can skip rows at the top or bottom

In [17]:
pd.read_excel('output.xlsx', 'FB', skiprows=[0], header=None).head()

Unnamed: 0,0,1,2,3,4,5,6
0,2017-01-03,117.839996,115.510002,116.029999,116.860001,20663900,116.860001
1,2017-01-04,119.660004,117.290001,117.550003,118.690002,19630900,118.690002
2,2017-01-05,120.949997,118.32,118.860001,120.669998,19492200,120.669998
3,2017-01-06,123.879997,120.029999,120.980003,123.410004,28545300,123.410004
4,2017-01-09,125.43,123.040001,123.550003,124.900002,22880400,124.900002


#### Other tips
* By default the first row is going to be used as the header (column names)
* If your data does not have a header, specify header=None
* Pass "names" argument with list of names to add your own custom header
* You can skip rows at either or both the top and/or bottom of the sheet
* Or simply specify how many rows to parse with nrows argument
* You can specify with list notation which columns you are interested in
* Other parameters, see documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html#pandas.read_excel