### Intro to the Module + Fetch Panel Dataset from Google Finance

In [117]:
## Series: one dimensional data structure; DataFrame: two-dimensional data structure; Panel: Three-dimensional data structure
## Need three references to pull the data needed from panel 
import pandas as pd
from pandas_datareader import data

In [9]:
## Extract information for multiple companies and results will be a panel data
companies= ['MSFT', 'GOOG', 'AAPL', 'YHOO', 'AMZN']
p = data.DataReader(name = companies, data_source = 'google', start = '2010-01-01', end = '2016-12-31')
p   ## p is a panel data 

In [118]:
stocks = pd.read_csv('Stocks.csv', parse_dates = ['Date']).drop('Adj Close', axis = 'columns')
stocks.head(3)

Unnamed: 0,Company,Date,Open,High,Low,Close,Volume
0,MSFT,2010-01-04,30.620001,31.1,30.59,30.950001,38409100
1,MSFT,2010-01-05,30.85,31.1,30.639999,30.959999,49749600
2,MSFT,2010-01-06,30.879999,31.08,30.52,30.77,58182400


In [119]:
p = pd.Panel(dict(zip(items, [stocks.pivot(index = 'Date', columns = 'Company', values = i) for i in items])))
p

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 1762 (major_axis) x 4 (minor_axis)
Items axis: Open to Volume
Major_axis axis: 2010-01-04 00:00:00 to 2016-12-30 00:00:00
Minor_axis axis: AAPL to MSFT

### The Axes of a Panel Object

In [49]:
## axes of panel objects are the components of three dimensional objects
## Dimensions: tells us how many dataframes are stored, we have columns * rows(each data frame) * number of dataframes
## in stocks, we have 5 dataframes, each dataframe has 1762 rows of record of four companies
p.items  ## The most external layer of panel data. 5 dataframes 

Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')

In [51]:
p.major_axis  ## an attribute gives us a list of major axis represents the row labels 

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
               '2016-12-22', '2016-12-23', '2016-12-27', '2016-12-28',
               '2016-12-29', '2016-12-30'],
              dtype='datetime64[ns]', name='Date', length=1762, freq=None)

In [50]:
p.minor_axis  # an attribute gives us a list of columns names in each dataframe

Index(['AAPL', 'AMZN', 'GOOG', 'MSFT'], dtype='object', name='Company')

In [52]:
p.axes  ## bundle the previous .items, .major_axis, .minor_axis attributes information together

[Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object'),
 DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
                '2016-12-22', '2016-12-23', '2016-12-27', '2016-12-28',
                '2016-12-29', '2016-12-30'],
               dtype='datetime64[ns]', name='Date', length=1762, freq=None),
 Index(['AAPL', 'AMZN', 'GOOG', 'MSFT'], dtype='object', name='Company')]

### Panel Attributes

In [53]:
p.ndim  ## panel data is three-dimensional data

3

In [54]:
p.dtypes

Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object

In [55]:
p.shape   ## tuple gives the measurements of the panel ( # of dataframes, rows, columns)

(5, 1762, 4)

In [56]:
p.size   ## total number of values stored in the panel data = 5 * 1762 * 4

35240

In [57]:
p.values  ## it gives all the rows of values within every single one of our data

array([[[3.04900000e+01, 1.36250000e+02, 3.12304413e+02, 3.06200010e+01],
        [3.06571430e+01, 1.33429993e+02, 3.12418976e+02, 3.08500000e+01],
        [3.06257130e+01, 1.34600006e+02, 3.11761444e+02, 3.08799990e+01],
        ...,
        [1.17519997e+02, 7.76250000e+02, 7.93700012e+02, 6.34000020e+01],
        [1.16449997e+02, 7.72400024e+02, 7.83330017e+02, 6.28600010e+01],
        [1.16650002e+02, 7.66469971e+02, 7.82750000e+02, 6.29599990e+01]],

       [[3.06428570e+01, 1.36610001e+02, 3.13579620e+02, 3.11000000e+01],
        [3.07985710e+01, 1.35479996e+02, 3.12747742e+02, 3.11000000e+01],
        [3.07471430e+01, 1.34729996e+02, 3.11761444e+02, 3.10800000e+01],
        ...,
        [1.18019997e+02, 7.80000000e+02, 7.94229980e+02, 6.34000020e+01],
        [1.17110001e+02, 7.73400024e+02, 7.85929993e+02, 6.32000010e+01],
        [1.17199997e+02, 7.67400024e+02, 7.82780029e+02, 6.29900020e+01]],

       [[3.03400000e+01, 1.33139999e+02, 3.10954468e+02, 3.05900000e+01],
        

### Use Bracket Notation to Extract a DataFrame from a Panel (for Subsetting)

In [58]:
p.items

Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')

In [60]:
p['Open']   ## it extracts the 'Open' information(dataframe) across all stocks and all time
## Usually bracket notation will extract the next smallest object(eg: dataframe use columns for bracket notation)
## panel use items(dataframes) for bracket notation.

Company,AAPL,AMZN,GOOG,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,30.490000,136.250000,312.304413,30.620001
2010-01-05,30.657143,133.429993,312.418976,30.850000
2010-01-06,30.625713,134.600006,311.761444,30.879999
2010-01-07,30.250000,132.009995,303.562164,30.629999
2010-01-08,30.042856,130.559998,294.894653,30.280001
2010-01-11,30.400000,132.619995,301.101410,30.709999
2010-01-12,29.884285,128.990005,297.709106,30.150000
2010-01-13,29.695715,127.900002,287.168610,30.260000
2010-01-14,30.015715,129.139999,290.859772,30.309999
2010-01-15,30.132856,129.179993,295.562164,31.080000


In [74]:
p['Volume']  ## This is a new dataframe object which means all functions can be worked on this dataframe
p['Volume']  ##  which is the same for p.Volume (use this to extract a dataframe), this only works when there is no space
p['Volume'].head(3)

Company,AAPL,AMZN,GOOG,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,123432400,7599900,3927000,38409100
2010-01-05,150476200,8851900,6031900,49749600
2010-01-06,138040000,7178800,7987100,58182400


### Extracting with the .loc, iloc, and .ix Methods

In [113]:
p['Open'].loc['2010-01-01':'2010-01-06' , 'AAPL':'GOOG']

Company,AAPL,AMZN,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-04,30.49,136.25,312.304413
2010-01-05,30.657143,133.429993,312.418976
2010-01-06,30.625713,134.600006,311.761444


In [86]:
## directly call .loc[] function on panel data, always extract the dataframe(item) first 
p.loc['Close'].head(3)

Company,AAPL,AMZN,GOOG,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,30.572857,133.899994,312.204773,30.950001
2010-01-05,30.625713,134.690002,310.829926,30.959999
2010-01-06,30.138571,132.25,302.994293,30.77


In [88]:
## panel.loc[item, major_axis, minor_axis], item, major_axis, minor_axis must be exists
p.loc['Close', '2014-04-08','GOOG']  ## extract value

553.380676

In [91]:
p.iloc[3, 200, 3]  ## Microsoft Close price on 2010-10-20

25.1

In [92]:
p.ix['High', 500, 'GOOG']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':


321.041656

### Convert Panel to a MultiIndex DataFrame (and Vice Versa)

In [111]:
df = p.to_frame()
## when .to_frame() function is called on panel, panel will be converted to a MultiIndex dataframe
df.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Volume
Date,Company,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,AAPL,30.49,30.642857,30.34,30.572857,123432400
2010-01-04,AMZN,136.25,136.610001,133.139999,133.899994,7599900
2010-01-04,GOOG,312.304413,313.57962,310.954468,312.204773,3927000


In [100]:
p2 = df.to_panel()   ## convert a dataframe to panel object
p2

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  if __name__ == '__main__':


<class 'pandas.core.panel.Panel'>
Dimensions: 5 (items) x 1762 (major_axis) x 4 (minor_axis)
Items axis: Open to Volume
Major_axis axis: 2010-01-04 00:00:00 to 2016-12-30 00:00:00
Minor_axis axis: AAPL to MSFT

### The .major_xs() Method

In [124]:
## One way to extract dataframes from panel objects
p.items
p['Volume']
## Another way: using .major_xs() function which compare with .major_axis attribute
## .major_xs() function returns a dataframe based on a major axis key, minor_axis(panel) will become row index label, items become column headers
p.major_xs('2016-09-06')  ## returns a dataframe cut out the major axis

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAPL,107.900002,108.300003,107.510002,107.699997,26880400
AMZN,774.039978,789.47998,770.219971,788.869995,3719800
GOOG,773.450012,782.0,771.0,780.080017,1442800
MSFT,57.779999,57.799999,57.209999,57.610001,16278400


### The .minor_xs() Method

In [121]:
p.minor_xs('AAPL').head(3)  ## must put in valid values, check by .minor_axis or .major)_axis

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.49,30.642857,30.34,30.572857,123432400
2010-01-05,30.657143,30.798571,30.464285,30.625713,150476200
2010-01-06,30.625713,30.747143,30.107143,30.138571,138040000


In [116]:
p['Open']  ## cut out item 
p.items
p.major_xs('2016-09-16')  ## cut out the major axis
p.major_axis
p.minor_xs('GOOG').head(2)  ## cut out the minor axis
p.minor_axis

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,312.304413,313.57962,310.954468,312.204773,3927000
2010-01-05,312.418976,312.747742,309.609497,310.829926,6031900


### Transpose a Panel with the .transpose() Method

In [129]:
## what if we want to sway the items(stock information) with the minor axis(companies)
p.axes  ## items is index 0 position, datetime is index 1 position, companies is index 2 position
## .transpose() function can be used to change the axies position to rebuild the structure of panel object 

[Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object'),
 DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
                '2016-12-22', '2016-12-23', '2016-12-27', '2016-12-28',
                '2016-12-29', '2016-12-30'],
               dtype='datetime64[ns]', name='Date', length=1762, freq=None),
 Index(['AAPL', 'AMZN', 'GOOG', 'MSFT'], dtype='object', name='Company')]

In [131]:
p2 = p.transpose(2, 1, 0)   ## need to be reassign if we want it
## .transpose() args in the sequence of wanted items, major axis and minor axis, lookup the index position of original axis
p2

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  return super(Panel, self).transpose(*axes, **kwargs)


<class 'pandas.core.panel.Panel'>
Dimensions: 4 (items) x 1762 (major_axis) x 5 (minor_axis)
Items axis: AAPL to MSFT
Major_axis axis: 2010-01-04 00:00:00 to 2016-12-30 00:00:00
Minor_axis axis: Open to Volume

In [132]:
p2['AAPL'].head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.49,30.642857,30.34,30.572857,123432400.0
2010-01-05,30.657143,30.798571,30.464285,30.625713,150476200.0
2010-01-06,30.625713,30.747143,30.107143,30.138571,138040000.0


In [133]:
p2.major_xs('2010-01-04')

Company,AAPL,AMZN,GOOG,MSFT
Open,30.49,136.25,312.3044,30.62
High,30.64286,136.61,313.5796,31.1
Low,30.34,133.14,310.9545,30.59
Close,30.57286,133.9,312.2048,30.95
Volume,123432400.0,7599900.0,3927000.0,38409100.0


In [135]:
p2.minor_xs('Volume').head(3)

Company,AAPL,AMZN,GOOG,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,123432400.0,7599900.0,3927000.0,38409100.0
2010-01-05,150476200.0,8851900.0,6031900.0,49749600.0
2010-01-06,138040000.0,7178800.0,7987100.0,58182400.0


### The .swapaxes() Method

In [137]:
## provide the names of the axis to the .swapaxes() function 
p2 = p.swapaxes('minor', 'items')
p2

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  from ipykernel import kernelapp as app


<class 'pandas.core.panel.Panel'>
Dimensions: 4 (items) x 1762 (major_axis) x 5 (minor_axis)
Items axis: AAPL to MSFT
Major_axis axis: 2010-01-04 00:00:00 to 2016-12-30 00:00:00
Minor_axis axis: Open to Volume

In [138]:
p2.axes

[Index(['AAPL', 'AMZN', 'GOOG', 'MSFT'], dtype='object', name='Company'),
 DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2016-12-16', '2016-12-19', '2016-12-20', '2016-12-21',
                '2016-12-22', '2016-12-23', '2016-12-27', '2016-12-28',
                '2016-12-29', '2016-12-30'],
               dtype='datetime64[ns]', name='Date', length=1762, freq=None),
 Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')]

In [140]:
p2['MSFT'].head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.620001,31.1,30.59,30.950001,38409100.0
2010-01-05,30.85,31.1,30.639999,30.959999,49749600.0
2010-01-06,30.879999,31.08,30.52,30.77,58182400.0


In [141]:
p2.major_xs('2016-09-02')

Company,AAPL,AMZN,GOOG,MSFT
Open,107.7,774.11,773.01,57.67
High,108.0,776.0,773.92,58.19
Low,106.82,771.7,768.41,57.42
Close,107.73,772.44,771.46,57.67
Volume,26802500.0,2181800.0,1072700.0,18900500.0


In [143]:
p2.minor_xs('Close').head(3)

Company,AAPL,AMZN,GOOG,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,30.572857,133.899994,312.204773,30.950001
2010-01-05,30.625713,134.690002,310.829926,30.959999
2010-01-06,30.138571,132.25,302.994293,30.77
