In [1]:
import pandas as pd
import numpy as np

In [2]:
tg = pd.read_csv('datasets/tech_giants.csv')

In [3]:
tg.head()

Unnamed: 0,date,month,year,day,name,open,close,high,low,volume,volume_type
0,2014-01-02,1,2014,2,FB,54.86,54.71,55.22,54.19,43257622,medium
1,2014-01-02,1,2014,2,AAPL,79.38,79.02,79.58,78.86,8398851,low
2,2014-01-02,1,2014,2,GOOGL,557.73,556.56,558.88,554.13,1822719,medium
3,2014-01-02,1,2014,2,MSFT,37.35,37.16,37.4,37.1,30643745,medium
4,2014-01-02,1,2014,2,AMZN,398.8,397.97,399.36,394.02,2140246,medium


In [4]:
tg.shape

(7105, 11)

In [5]:
7105 / 250 /5

5.684

In [6]:
# 5 years of data
# 250 trading days, and 5 companies 

In [7]:
tg.year.value_counts()

2016    1260
2014    1260
2015    1260
2017    1255
2018    1255
2019     815
Name: year, dtype: int64

In [8]:
tg.year.value_counts()/5

2016    252.0
2014    252.0
2015    252.0
2017    251.0
2018    251.0
2019    163.0
Name: year, dtype: float64

In [9]:
# 5 full years and 1 half year

In [10]:
tg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7105 entries, 0 to 7104
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         7105 non-null   object 
 1   month        7105 non-null   int64  
 2   year         7105 non-null   int64  
 3   day          7105 non-null   int64  
 4   name         7105 non-null   object 
 5   open         7105 non-null   float64
 6   close        7105 non-null   float64
 7   high         7105 non-null   float64
 8   low          7105 non-null   float64
 9   volume       7105 non-null   int64  
 10  volume_type  7105 non-null   object 
dtypes: float64(4), int64(4), object(3)
memory usage: 610.7+ KB


### Index and RangeIndex

In [11]:
type(tg.index)

# range index is immutable object
# which further inhertis from the index class

pandas.core.indexes.range.RangeIndex

In [12]:
type(tg.columns)

# index is another immutable data structure
# numpy ndarray which is ordered and sliceable

pandas.core.indexes.base.Index

In [13]:
tg.set_index('date')

Unnamed: 0_level_0,month,year,day,name,open,close,high,low,volume,volume_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,1,2014,2,FB,54.86,54.71,55.22,54.19,43257622,medium
2014-01-02,1,2014,2,AAPL,79.38,79.02,79.58,78.86,8398851,low
2014-01-02,1,2014,2,GOOGL,557.73,556.56,558.88,554.13,1822719,medium
2014-01-02,1,2014,2,MSFT,37.35,37.16,37.40,37.10,30643745,medium
2014-01-02,1,2014,2,AMZN,398.80,397.97,399.36,394.02,2140246,medium
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,8,2019,23,MSFT,137.19,133.39,138.35,132.80,38515386,medium
2019-08-23,8,2019,23,AAPL,209.43,202.64,212.05,201.00,46882843,medium
2019-08-23,8,2019,23,GOOGL,1185.17,1153.58,1195.67,1150.00,1813141,medium
2019-08-23,8,2019,23,AMZN,1793.03,1749.62,1804.90,1745.23,5277898,medium


In [14]:
# label based indexing

tg.set_index('date').loc['2019-08-01']

Unnamed: 0_level_0,month,year,day,name,open,close,high,low,volume,volume_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-08-01,8,2019,1,GOOGL,1217.63,1211.78,1236.3,1207.0,1771271,medium
2019-08-01,8,2019,1,FB,194.17,192.73,198.47,190.88,17777013,medium
2019-08-01,8,2019,1,MSFT,137.0,138.06,140.94,136.93,40557502,medium
2019-08-01,8,2019,1,AMZN,1871.72,1855.32,1897.92,1844.01,4713311,medium
2019-08-01,8,2019,1,AAPL,213.9,208.43,218.03,206.74,54017922,medium


### Creating a MultiIndex

In [15]:
# using more than one field as the index for our
# dataframe
# hierarchical index

In [16]:
tg.set_index(['date','name'], inplace=True)

In [17]:
tg.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
2014-01-02,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851,low
2014-01-02,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719,medium
2014-01-02,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745,medium
2014-01-02,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246,medium
2014-01-03,FB,1,2014,3,55.0,54.56,55.65,54.53,38287706,medium
2014-01-03,GOOGL,1,2014,3,557.5,552.5,558.47,552.47,1669229,medium
2014-01-03,MSFT,1,2014,3,37.2,36.91,37.22,36.6,31134795,medium
2014-01-03,AAPL,1,2014,3,79.0,77.28,79.1,77.2,14043410,low
2014-01-03,AMZN,1,2014,3,398.29,396.44,402.71,396.22,2213512,medium


In [18]:
type(tg.index)

pandas.core.indexes.multi.MultiIndex

### MultiIndex from read_csv()

In [19]:
tg = pd.read_csv('datasets/tech_giants.csv',index_col=['date','name'])

In [20]:
tg.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
2014-01-02,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851,low
2014-01-02,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719,medium
2014-01-02,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745,medium
2014-01-02,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246,medium


In [21]:
# way 2
# pd.DataFrame(data,index=MultiIndexObject`)

### Hierarchical DataFrames

In [22]:
tg.index

MultiIndex([('2014-01-02',    'FB'),
            ('2014-01-02',  'AAPL'),
            ('2014-01-02', 'GOOGL'),
            ('2014-01-02',  'MSFT'),
            ('2014-01-02',  'AMZN'),
            ('2014-01-03',    'FB'),
            ('2014-01-03', 'GOOGL'),
            ('2014-01-03',  'MSFT'),
            ('2014-01-03',  'AAPL'),
            ('2014-01-03',  'AMZN'),
            ...
            ('2019-08-22',  'MSFT'),
            ('2019-08-22',    'FB'),
            ('2019-08-22',  'AMZN'),
            ('2019-08-22',  'AAPL'),
            ('2019-08-22', 'GOOGL'),
            ('2019-08-23',  'MSFT'),
            ('2019-08-23',  'AAPL'),
            ('2019-08-23', 'GOOGL'),
            ('2019-08-23',  'AMZN'),
            ('2019-08-23',    'FB')],
           names=['date', 'name'], length=7105)

In [23]:
tg.loc['2014-01-02','GOOGL']

month                1
year              2014
day                  2
open            557.73
close           556.56
high            558.88
low             554.13
volume         1822719
volume_type     medium
Name: (2014-01-02, GOOGL), dtype: object

In [24]:
tg.loc['2014-01-02']
# indexing by first index (date)

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume,volume_type
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851,low
GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719,medium
MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745,medium
AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246,medium


In [25]:
tg.loc['2014-01-02','GOOGL']

month                1
year              2014
day                  2
open            557.73
close           556.56
high            558.88
low             554.13
volume         1822719
volume_type     medium
Name: (2014-01-02, GOOGL), dtype: object

In [26]:
# tg.loc['2014-01-02']['GOOGL']

In [27]:
tg.loc['2014-01-02','GOOGL'].close

556.56

In [28]:
# passing the tuple to capture the label in multi-index

tg.loc[('2014-01-02','GOOGL')]

month                1
year              2014
day                  2
open            557.73
close           556.56
high            558.88
low             554.13
volume         1822719
volume_type     medium
Name: (2014-01-02, GOOGL), dtype: object

In [29]:
tg.loc[('2014-01-02','GOOGL'),'close']

556.56

In [30]:
# agnostic: having a non-commital attitude or belief

In [31]:
# iloc - agnostic to hierarchical dataframes
# loc  - cares

In [32]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
2014-01-02,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851,low
2014-01-02,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719,medium
2014-01-02,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745,medium
2014-01-02,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246,medium


In [33]:
tg.iloc[2,4]

556.56

In [34]:
# lthe label based approach

tg.loc[('2014-01-03','AAPL'), 'open':'close']

open      79.0
close    77.28
Name: (2014-01-03, AAPL), dtype: object

In [35]:
# alternatively
tg.iloc[8,3:5]

open      79.0
close    77.28
Name: (2014-01-03, AAPL), dtype: object

### Indexing Ranges and Slices

In [36]:
# select multiple days

In [37]:
tg.loc[['2015-01-06','2015-01-07']]

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-06,AAPL,1,2015,6,106.54,106.26,107.43,104.63,65797116,medium
2015-01-06,FB,1,2015,6,77.23,76.15,77.59,75.36,27399288,medium
2015-01-06,AMZN,1,2015,6,302.2,295.29,303.0,292.38,3519034,medium
2015-01-06,MSFT,1,2015,6,46.38,45.65,46.75,45.54,36447854,medium
2015-01-06,GOOGL,1,2015,6,520.49,506.64,521.21,505.55,2731813,medium
2015-01-07,FB,1,2015,7,76.76,76.15,77.36,75.82,22045333,medium
2015-01-07,MSFT,1,2015,7,45.98,46.23,46.46,45.49,29114061,medium
2015-01-07,GOOGL,1,2015,7,510.99,505.15,511.49,503.65,2345875,medium
2015-01-07,AMZN,1,2015,7,297.54,298.42,301.28,295.33,2640349,medium
2015-01-07,AAPL,1,2015,7,107.2,107.75,108.2,106.7,40105934,medium


In [38]:
tg.loc[(['2015-01-06','2015-01-07'],['AMZN','FB']),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-06,AMZN,1,2015,6,302.2,295.29,303.0,292.38,3519034,medium
2015-01-06,FB,1,2015,6,77.23,76.15,77.59,75.36,27399288,medium
2015-01-07,AMZN,1,2015,7,297.54,298.42,301.28,295.33,2640349,medium
2015-01-07,FB,1,2015,7,76.76,76.15,77.36,75.82,22045333,medium


In [39]:
# [([index1],[index2]),[column_index]]

In [40]:
# [([index1],[index2]),:]

In [41]:
tg.loc[(['2015-01-06','2015-01-07'],['AMZN','FB']),['open','close','volume']]

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,volume
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-06,AMZN,302.2,295.29,3519034
2015-01-06,FB,77.23,76.15,27399288
2015-01-07,AMZN,297.54,298.42,2640349
2015-01-07,FB,76.76,76.15,22045333


In [42]:
# ablility to slice

In [43]:
# slicing across both dimensions 

tg.loc['2017-01-03':'2017-01-31','open':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,AMZN,757.92,753.67,758.76,747.70
2017-01-03,FB,116.03,116.86,117.84,115.51
2017-01-03,MSFT,62.79,62.58,62.84,62.13
2017-01-03,AAPL,115.80,116.15,116.33,114.76
2017-01-03,GOOGL,800.62,808.01,811.44,796.89
...,...,...,...,...,...
2017-01-31,MSFT,64.86,64.65,65.15,64.26
2017-01-31,AAPL,121.15,121.35,121.39,120.62
2017-01-31,FB,130.17,130.32,130.66,129.52
2017-01-31,GOOGL,819.50,820.19,823.07,813.40


In [44]:
# tg.loc[(['2017-01-03':'2017-01-31'],['GOOGL']),'open':'low']

In [45]:
# inorder to slice hierachical index, we have to slice object

In [46]:
tg.loc[(slice('2017-01-03','2017-01-31'),'GOOGL'),'open':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,GOOGL,800.62,808.01,811.44,796.89
2017-01-04,GOOGL,809.89,807.77,813.43,804.11
2017-01-05,GOOGL,807.5,813.02,813.74,805.92
2017-01-06,GOOGL,814.99,825.21,828.96,811.5
2017-01-09,GOOGL,826.37,827.18,830.43,821.62
2017-01-10,GOOGL,827.07,826.01,829.41,823.14
2017-01-11,GOOGL,826.62,829.86,829.9,821.47
2017-01-12,GOOGL,828.38,829.53,830.38,821.01
2017-01-13,GOOGL,831.0,830.94,834.65,829.52
2017-01-17,GOOGL,830.0,827.46,830.18,823.2


In [47]:
tg.loc[(slice('2017-01-03','2017-01-31'),'GOOGL'),'open':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,GOOGL,800.62,808.01,811.44,796.89
2017-01-04,GOOGL,809.89,807.77,813.43,804.11
2017-01-05,GOOGL,807.5,813.02,813.74,805.92
2017-01-06,GOOGL,814.99,825.21,828.96,811.5
2017-01-09,GOOGL,826.37,827.18,830.43,821.62
2017-01-10,GOOGL,827.07,826.01,829.41,823.14
2017-01-11,GOOGL,826.62,829.86,829.9,821.47
2017-01-12,GOOGL,828.38,829.53,830.38,821.01
2017-01-13,GOOGL,831.0,830.94,834.65,829.52
2017-01-17,GOOGL,830.0,827.46,830.18,823.2


In [48]:
# skipping of index
# to select everything slice(NOne)

tg.loc[(slice(None), ('FB','AMZN')),'open']

date        name
2014-01-02  FB        54.86
2014-01-03  FB        55.00
2014-01-06  FB        54.39
2014-01-07  FB        57.67
2014-01-08  FB        57.59
                     ...   
2019-08-19  AMZN    1818.08
2019-08-20  AMZN    1814.50
2019-08-21  AMZN    1819.39
2019-08-22  AMZN    1828.00
2019-08-23  AMZN    1793.03
Name: open, Length: 2842, dtype: float64

In [49]:
tg.loc[(slice(None), ['FB','AMZN']),'open']

date        name
2014-01-02  FB        54.86
2014-01-03  FB        55.00
2014-01-06  FB        54.39
2014-01-07  FB        57.67
2014-01-08  FB        57.59
                     ...   
2019-08-19  AMZN    1818.08
2019-08-20  AMZN    1814.50
2019-08-21  AMZN    1819.39
2019-08-22  AMZN    1828.00
2019-08-23  AMZN    1793.03
Name: open, Length: 2842, dtype: float64

### IndexSlice (: operator)

In [50]:
# high and low for all days for AMZN and FB

In [51]:
tg.loc[pd.IndexSlice[:,['FB','AMZN']],'high':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-01-02,FB,55.22,54.19
2014-01-03,FB,55.65,54.53
2014-01-06,FB,57.26,54.05
2014-01-07,FB,58.55,57.22
2014-01-08,FB,58.41,57.23
...,...,...,...
2019-08-19,AMZN,1826.00,1812.61
2019-08-20,AMZN,1816.82,1799.88
2019-08-21,AMZN,1829.58,1815.00
2019-08-22,AMZN,1829.41,1800.10


In [52]:
i = pd.IndexSlice 
# using short form 

In [53]:
tg.loc[i[:,['FB','AMZN']],'high':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-01-02,FB,55.22,54.19
2014-01-03,FB,55.65,54.53
2014-01-06,FB,57.26,54.05
2014-01-07,FB,58.55,57.22
2014-01-08,FB,58.41,57.23
...,...,...,...
2019-08-19,AMZN,1826.00,1812.61
2019-08-20,AMZN,1816.82,1799.88
2019-08-21,AMZN,1829.58,1815.00
2019-08-22,AMZN,1829.41,1800.10


In [54]:
# jan 6 to jan 10

In [55]:
tg.loc[i['2014-01-06':'2014-01-10',['FB','AMZN']], 'open':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-06,AMZN,396.13,393.63,397.0,388.42
2014-01-06,FB,54.39,57.2,57.26,54.05
2014-01-07,FB,57.67,57.92,58.55,57.22
2014-01-07,AMZN,395.04,398.03,398.47,394.29
2014-01-08,AMZN,398.47,401.92,403.0,396.04
2014-01-08,FB,57.59,58.23,58.41,57.23
2014-01-09,AMZN,403.75,401.01,406.89,398.44
2014-01-09,FB,58.66,57.22,58.96,56.65
2014-01-10,FB,57.13,57.94,58.3,57.06
2014-01-10,AMZN,402.04,397.66,403.76,393.8


### Cross Sections with xs()

In [56]:
# subset of label based indexing

In [57]:
tg.xs('2019-01-02')

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume,volume_type
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MSFT,1,2019,2,99.55,101.12,101.75,98.94,35329345,medium
FB,1,2019,2,128.99,135.68,137.51,128.56,28146193,medium
GOOGL,1,2019,2,1027.2,1054.68,1060.79,1025.28,1593395,medium
AMZN,1,2019,2,1465.2,1539.13,1553.36,1460.93,7983103,medium
AAPL,1,2019,2,154.89,157.92,158.85,154.23,37039737,medium


In [58]:
tg.loc['2019-01-02']

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume,volume_type
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MSFT,1,2019,2,99.55,101.12,101.75,98.94,35329345,medium
FB,1,2019,2,128.99,135.68,137.51,128.56,28146193,medium
GOOGL,1,2019,2,1027.2,1054.68,1060.79,1025.28,1593395,medium
AMZN,1,2019,2,1465.2,1539.13,1553.36,1460.93,7983103,medium
AAPL,1,2019,2,154.89,157.92,158.85,154.23,37039737,medium


In [59]:
# for FB all dates

tg.loc[(slice(None),('FB')),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
2014-01-03,FB,1,2014,3,55.00,54.56,55.65,54.53,38287706,medium
2014-01-06,FB,1,2014,6,54.39,57.20,57.26,54.05,68974359,high
2014-01-07,FB,1,2014,7,57.67,57.92,58.55,57.22,77329009,high
2014-01-08,FB,1,2014,8,57.59,58.23,58.41,57.23,56800776,high
...,...,...,...,...,...,...,...,...,...,...
2019-08-19,FB,8,2019,19,186.01,186.17,187.50,184.85,9699661,low
2019-08-20,FB,8,2019,20,185.45,183.81,186.00,182.39,10087592,low
2019-08-21,FB,8,2019,21,185.00,183.55,185.90,183.14,8409548,low
2019-08-22,FB,8,2019,22,183.43,182.04,184.11,179.91,10829509,low


In [60]:
tg.xs('FB',level=1)

# drops the column it is selected from

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume,volume_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-01-02,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
2014-01-03,1,2014,3,55.00,54.56,55.65,54.53,38287706,medium
2014-01-06,1,2014,6,54.39,57.20,57.26,54.05,68974359,high
2014-01-07,1,2014,7,57.67,57.92,58.55,57.22,77329009,high
2014-01-08,1,2014,8,57.59,58.23,58.41,57.23,56800776,high
...,...,...,...,...,...,...,...,...,...
2019-08-19,8,2019,19,186.01,186.17,187.50,184.85,9699661,low
2019-08-20,8,2019,20,185.45,183.81,186.00,182.39,10087592,low
2019-08-21,8,2019,21,185.00,183.55,185.90,183.14,8409548,low
2019-08-22,8,2019,22,183.43,182.04,184.11,179.91,10829509,low


In [61]:
tg.xs('2014-01-02',level=0)


Unnamed: 0_level_0,month,year,day,open,close,high,low,volume,volume_type
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851,low
GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719,medium
MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745,medium
AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246,medium


In [62]:
tg.xs('FB',level=1, drop_level=False)

# this does not drop name column

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622,medium
2014-01-03,FB,1,2014,3,55.00,54.56,55.65,54.53,38287706,medium
2014-01-06,FB,1,2014,6,54.39,57.20,57.26,54.05,68974359,high
2014-01-07,FB,1,2014,7,57.67,57.92,58.55,57.22,77329009,high
2014-01-08,FB,1,2014,8,57.59,58.23,58.41,57.23,56800776,high
...,...,...,...,...,...,...,...,...,...,...
2019-08-19,FB,8,2019,19,186.01,186.17,187.50,184.85,9699661,low
2019-08-20,FB,8,2019,20,185.45,183.81,186.00,182.39,10087592,low
2019-08-21,FB,8,2019,21,185.00,183.55,185.90,183.14,8409548,low
2019-08-22,FB,8,2019,22,183.43,182.04,184.11,179.91,10829509,low


In [63]:
## selecting from multiple levels at once

In [64]:
tg.xs(('2019-01-02','FB'),level=(0,1))

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume,volume_type
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-02,FB,1,2019,2,128.99,135.68,137.51,128.56,28146193,medium


In [65]:
tg.xs('month',axis=1)

date        name 
2014-01-02  FB       1
            AAPL     1
            GOOGL    1
            MSFT     1
            AMZN     1
                    ..
2019-08-23  MSFT     8
            AAPL     8
            GOOGL    8
            AMZN     8
            FB       8
Name: month, Length: 7105, dtype: int64

In [67]:
# tg.loc[pd.IndexSlice[['2015-07-13':'2016-08-17']],'open':'low']

In [68]:
tech_df2 = tg.loc[i['2015-07-13':'2016-08-17'],'open':'low']

In [69]:
tech_df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-07-13,MSFT,44.98,45.54,45.62,44.95
2015-07-13,AMZN,448.29,455.57,457.87,447.54
2015-07-13,FB,88.66,90.1,90.22,88.42
2015-07-13,GOOGL,559.51,571.73,572.85,558.7
2015-07-13,AAPL,125.03,125.66,125.76,124.32


In [70]:
tech_df2.loc[i[:,['AAPL']],:].sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-05-16,AAPL,92.39,93.88,94.39,91.65
2016-04-26,AAPL,103.91,104.35,105.3,103.91
2015-09-17,AAPL,115.66,113.92,116.49,113.72
2015-12-24,AAPL,109.0,108.03,109.0,107.95
2015-10-09,AAPL,110.0,112.12,112.28,109.49
2015-07-23,AAPL,126.2,125.16,127.09,125.06
2015-11-17,AAPL,114.92,113.69,115.05,113.32
2016-02-12,AAPL,94.19,93.99,94.5,93.01
2016-03-18,AAPL,106.34,105.92,106.5,105.19
2016-01-12,AAPL,100.55,99.96,100.69,98.84


In [71]:
tech_df2.loc[i[:,['AAPL']],:].sample(10) 

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-07-15,AAPL,98.92,98.78,99.3,98.5
2015-08-18,AAPL,116.43,116.5,117.44,116.01
2016-01-06,AAPL,100.56,100.7,102.37,99.87
2015-12-24,AAPL,109.0,108.03,109.0,107.95
2015-08-06,AAPL,115.97,115.13,116.5,114.12
2016-05-06,AAPL,93.37,92.72,93.45,91.85
2015-10-29,AAPL,118.7,120.53,120.69,118.27
2016-04-06,AAPL,110.23,110.96,110.98,109.2
2016-03-08,AAPL,100.78,101.03,101.76,100.4
2016-01-11,AAPL,98.97,98.53,99.06,97.34


In [72]:
tech_df2.loc[i[:,['AAPL','GOOGL']],'high':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,high,low
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-07-13,AAPL,125.76,124.32
2015-07-14,AAPL,126.37,125.04
2015-07-15,AAPL,127.15,125.58
2015-07-16,AAPL,128.57,127.35
2015-07-17,AAPL,129.62,128.31
...,...,...,...
2016-08-11,GOOGL,813.88,806.00
2016-08-12,GOOGL,807.19,803.64
2016-08-15,GOOGL,811.36,804.03
2016-08-16,GOOGL,804.26,797.00


In [73]:
type(tg)

pandas.core.frame.DataFrame

In [74]:
type(tg.index)

pandas.core.indexes.multi.MultiIndex

In [75]:
tg.index

MultiIndex([('2014-01-02',    'FB'),
            ('2014-01-02',  'AAPL'),
            ('2014-01-02', 'GOOGL'),
            ('2014-01-02',  'MSFT'),
            ('2014-01-02',  'AMZN'),
            ('2014-01-03',    'FB'),
            ('2014-01-03', 'GOOGL'),
            ('2014-01-03',  'MSFT'),
            ('2014-01-03',  'AAPL'),
            ('2014-01-03',  'AMZN'),
            ...
            ('2019-08-22',  'MSFT'),
            ('2019-08-22',    'FB'),
            ('2019-08-22',  'AMZN'),
            ('2019-08-22',  'AAPL'),
            ('2019-08-22', 'GOOGL'),
            ('2019-08-23',  'MSFT'),
            ('2019-08-23',  'AAPL'),
            ('2019-08-23', 'GOOGL'),
            ('2019-08-23',  'AMZN'),
            ('2019-08-23',    'FB')],
           names=['date', 'name'], length=7105)

In [79]:
# names

In [78]:
tg.index.names

FrozenList(['date', 'name'])

In [80]:
# levels

In [81]:
tg.index.nlevels

2

In [82]:
len(tg.index.levels)

2

In [83]:
tg.index.levels

FrozenList([['2014-01-02', '2014-01-03', '2014-01-06', '2014-01-07', '2014-01-08', '2014-01-09', '2014-01-10', '2014-01-13', '2014-01-14', '2014-01-15', '2014-01-16', '2014-01-17', '2014-01-21', '2014-01-22', '2014-01-23', '2014-01-24', '2014-01-27', '2014-01-28', '2014-01-29', '2014-01-30', '2014-01-31', '2014-02-03', '2014-02-04', '2014-02-05', '2014-02-06', '2014-02-07', '2014-02-10', '2014-02-11', '2014-02-12', '2014-02-13', '2014-02-14', '2014-02-18', '2014-02-19', '2014-02-20', '2014-02-21', '2014-02-24', '2014-02-25', '2014-02-26', '2014-02-27', '2014-02-28', '2014-03-03', '2014-03-04', '2014-03-05', '2014-03-06', '2014-03-07', '2014-03-10', '2014-03-11', '2014-03-12', '2014-03-13', '2014-03-14', '2014-03-17', '2014-03-18', '2014-03-19', '2014-03-20', '2014-03-21', '2014-03-24', '2014-03-25', '2014-03-26', '2014-03-27', '2014-03-28', '2014-03-31', '2014-04-01', '2014-04-02', '2014-04-03', '2014-04-04', '2014-04-07', '2014-04-08', '2014-04-09', '2014-04-10', '2014-04-11', '2014-0

In [84]:
tg.index.levels[0]

Index(['2014-01-02', '2014-01-03', '2014-01-06', '2014-01-07', '2014-01-08',
       '2014-01-09', '2014-01-10', '2014-01-13', '2014-01-14', '2014-01-15',
       ...
       '2019-08-12', '2019-08-13', '2019-08-14', '2019-08-15', '2019-08-16',
       '2019-08-19', '2019-08-20', '2019-08-21', '2019-08-22', '2019-08-23'],
      dtype='object', name='date', length=1421)

In [85]:
tg.index.levels[1]

Index(['AAPL', 'AMZN', 'FB', 'GOOGL', 'MSFT'], dtype='object', name='name')

In [87]:
# highest level of index is date index (outer level)

In [90]:
tg.index.levshape

# there are 1421 dates and 5 distinct names (tickers)

(1421, 5)

In [91]:
tg.index.values

array([('2014-01-02', 'FB'), ('2014-01-02', 'AAPL'),
       ('2014-01-02', 'GOOGL'), ..., ('2019-08-23', 'GOOGL'),
       ('2019-08-23', 'AMZN'), ('2019-08-23', 'FB')], dtype=object)

### Adding Another Level

In [92]:
tg.set_index('volume_type', append=True)

# append to exsinting index instead of creating new index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,name,volume_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,medium,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,AAPL,low,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,GOOGL,medium,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,MSFT,medium,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,AMZN,medium,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,MSFT,medium,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,AAPL,medium,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,GOOGL,medium,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,AMZN,medium,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [94]:
tg.set_index('volume_type')

# replaces other index and sets volume_type as new index

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume
volume_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
medium,1,2014,2,54.86,54.71,55.22,54.19,43257622
low,1,2014,2,79.38,79.02,79.58,78.86,8398851
medium,1,2014,2,557.73,556.56,558.88,554.13,1822719
medium,1,2014,2,37.35,37.16,37.40,37.10,30643745
medium,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...
medium,8,2019,23,137.19,133.39,138.35,132.80,38515386
medium,8,2019,23,209.43,202.64,212.05,201.00,46882843
medium,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
medium,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [95]:
tg.set_index('volume_type', append=True, inplace=True)


In [96]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,name,volume_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,medium,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,AAPL,low,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,GOOGL,medium,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,MSFT,medium,1,2014,2,37.35,37.16,37.4,37.1,30643745
2014-01-02,AMZN,medium,1,2014,2,398.8,397.97,399.36,394.02,2140246


In [97]:
tg.index.nlevels

3

In [98]:
tg.index.levels[2]

Index(['high', 'low', 'medium'], dtype='object', name='volume_type')

In [100]:
tg.index.levshape

(1421, 5, 3)

In [102]:
tg.index.values

array([('2014-01-02', 'FB', 'medium'), ('2014-01-02', 'AAPL', 'low'),
       ('2014-01-02', 'GOOGL', 'medium'), ...,
       ('2019-08-23', 'GOOGL', 'medium'),
       ('2019-08-23', 'AMZN', 'medium'), ('2019-08-23', 'FB', 'medium')],
      dtype=object)

In [103]:
# jan 2019 high volumne trading days

In [106]:
tg.loc[(slice('2019-01-01','2019-01-31'), slice(None), 'high'),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,name,volume_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-03,AAPL,high,1,2019,3,143.98,142.19,145.72,142.0,91312195
2019-01-04,AMZN,high,1,2019,4,1530.0,1575.39,1594.0,1518.31,9182575
2019-01-08,AMZN,high,1,2019,8,1664.69,1656.58,1676.61,1616.61,8881428
2019-01-31,AMZN,high,1,2019,31,1692.85,1718.73,1736.41,1679.08,10910338
2019-01-31,FB,high,1,2019,31,165.6,166.69,171.68,165.0,77233602


In [107]:
tg.loc[(slice('2019-01-01','2019-01-31'), slice(None), ['high','medium']),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,name,volume_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-02,MSFT,medium,1,2019,2,99.55,101.12,101.75,98.94,35329345
2019-01-02,FB,medium,1,2019,2,128.99,135.68,137.51,128.56,28146193
2019-01-02,GOOGL,medium,1,2019,2,1027.20,1054.68,1060.79,1025.28,1593395
2019-01-02,AMZN,medium,1,2019,2,1465.20,1539.13,1553.36,1460.93,7983103
2019-01-02,AAPL,medium,1,2019,2,154.89,157.92,158.85,154.23,37039737
...,...,...,...,...,...,...,...,...,...,...
2019-01-31,AAPL,medium,1,2019,31,166.11,166.44,169.00,164.56,40739649
2019-01-31,GOOGL,medium,1,2019,31,1112.24,1125.89,1127.67,1105.25,2011572
2019-01-31,AMZN,high,1,2019,31,1692.85,1718.73,1736.41,1679.08,10910338
2019-01-31,FB,high,1,2019,31,165.60,166.69,171.68,165.00,77233602


In [109]:
# all high volume trading days

In [110]:
tg.xs(('high'),level=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-01-06,FB,1,2014,6,54.39,57.20,57.26,54.05,68974359
2014-01-07,FB,1,2014,7,57.67,57.92,58.55,57.22,77329009
2014-01-08,FB,1,2014,8,57.59,58.23,58.41,57.23,56800776
2014-01-09,FB,1,2014,9,58.66,57.22,58.96,56.65,92349222
2014-01-13,FB,1,2014,13,57.89,55.91,58.25,55.38,63106519
...,...,...,...,...,...,...,...,...,...
2019-04-30,GOOGL,4,2019,30,1190.63,1198.96,1200.98,1183.00,6658855
2019-06-03,FB,6,2019,3,175.00,164.15,175.05,161.01,56059609
2019-06-03,AMZN,6,2019,3,1760.01,1692.69,1766.29,1672.00,9098708
2019-06-03,GOOGL,6,2019,3,1066.93,1038.74,1067.00,1027.03,4844480


In [112]:
tg.xs(('FB','high'),level=(1,2))

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-01-06,1,2014,6,54.39,57.20,57.26,54.05,68974359
2014-01-07,1,2014,7,57.67,57.92,58.55,57.22,77329009
2014-01-08,1,2014,8,57.59,58.23,58.41,57.23,56800776
2014-01-09,1,2014,9,58.66,57.22,58.96,56.65,92349222
2014-01-13,1,2014,13,57.89,55.91,58.25,55.38,63106519
...,...,...,...,...,...,...,...,...
2018-10-31,10,2018,31,155.00,151.79,156.40,148.96,60101251
2018-12-19,12,2018,19,141.21,133.24,144.91,132.50,57404894
2018-12-21,12,2018,21,133.39,124.95,134.90,123.42,56901491
2019-01-31,1,2019,31,165.60,166.69,171.68,165.00,77233602


In [113]:
115/375

0.30666666666666664

### Shuffling Levels

In [114]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,name,volume_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,medium,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,AAPL,low,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,GOOGL,medium,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,MSFT,medium,1,2014,2,37.35,37.16,37.4,37.1,30643745
2014-01-02,AMZN,medium,1,2014,2,398.8,397.97,399.36,394.02,2140246


In [116]:
## swaplevel

In [117]:
tg.swaplevel(2,1)

# swapping 2 and 1 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,medium,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,medium,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,medium,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,medium,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [118]:
tg.swaplevel('volume_type','name')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,medium,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,medium,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,medium,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,medium,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [119]:
tg = tg.swaplevel('volume_type','name')

In [120]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745
2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246


In [121]:
# to reorder more broadly using a single method

In [122]:
tg.reorder_levels([2,0,1])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
name,date,volume_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FB,2014-01-02,medium,1,2014,2,54.86,54.71,55.22,54.19,43257622
AAPL,2014-01-02,low,1,2014,2,79.38,79.02,79.58,78.86,8398851
GOOGL,2014-01-02,medium,1,2014,2,557.73,556.56,558.88,554.13,1822719
MSFT,2014-01-02,medium,1,2014,2,37.35,37.16,37.40,37.10,30643745
AMZN,2014-01-02,medium,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...
MSFT,2019-08-23,medium,8,2019,23,137.19,133.39,138.35,132.80,38515386
AAPL,2019-08-23,medium,8,2019,23,209.43,202.64,212.05,201.00,46882843
GOOGL,2019-08-23,medium,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
AMZN,2019-08-23,medium,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [123]:
tg.index.reorder_levels([2,0,1])

MultiIndex([(   'FB', '2014-01-02', 'medium'),
            ( 'AAPL', '2014-01-02',    'low'),
            ('GOOGL', '2014-01-02', 'medium'),
            ( 'MSFT', '2014-01-02', 'medium'),
            ( 'AMZN', '2014-01-02', 'medium'),
            (   'FB', '2014-01-03', 'medium'),
            ('GOOGL', '2014-01-03', 'medium'),
            ( 'MSFT', '2014-01-03', 'medium'),
            ( 'AAPL', '2014-01-03',    'low'),
            ( 'AMZN', '2014-01-03', 'medium'),
            ...
            ( 'MSFT', '2019-08-22', 'medium'),
            (   'FB', '2019-08-22',    'low'),
            ( 'AMZN', '2019-08-22', 'medium'),
            ( 'AAPL', '2019-08-22', 'medium'),
            ('GOOGL', '2019-08-22',    'low'),
            ( 'MSFT', '2019-08-23', 'medium'),
            ( 'AAPL', '2019-08-23', 'medium'),
            ('GOOGL', '2019-08-23', 'medium'),
            ( 'AMZN', '2019-08-23', 'medium'),
            (   'FB', '2019-08-23', 'medium')],
           names=['name', 'date', 'volume_t

### Removing MultiIndex Levels

In [125]:
tg.droplevel(1)
# the second column has been dropped

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...
2019-08-23,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [127]:
# tg.head()

In [130]:
# to restore it back to the dataframe
# volume_type has been placed back to the data

tg.reset_index(level=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,volume_type,month,year,day,open,close,high,low,volume
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,FB,medium,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,AAPL,low,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,GOOGL,medium,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,MSFT,medium,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,AMZN,medium,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,MSFT,medium,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,AAPL,medium,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,GOOGL,medium,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,AMZN,medium,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [132]:
tg.reset_index(level=1,drop=True)

# discarded just like with droplevel

Unnamed: 0_level_0,Unnamed: 1_level_0,month,year,day,open,close,high,low,volume
date,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-01-02,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...
2019-08-23,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [133]:
# removing several levels at once

In [134]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745
2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246


In [136]:
tg.droplevel(['volume_type','name'])

# removes both levels

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-01-02,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...
2019-08-23,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [138]:
tg.droplevel([1,2]) # passing integer

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-01-02,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...
2019-08-23,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [139]:
tg.reset_index(level=['volume_type','name'],drop=True)

Unnamed: 0_level_0,month,year,day,open,close,high,low,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-01-02,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...
2019-08-23,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [140]:
tg.reset_index(level=['volume_type','name'])

Unnamed: 0_level_0,volume_type,name,month,year,day,open,close,high,low,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,medium,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,medium,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,medium,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,medium,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


In [141]:
# removing all index and getting back to dfault to basic range index

In [142]:
tg.reset_index()

Unnamed: 0,date,volume_type,name,month,year,day,open,close,high,low,volume
0,2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
1,2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2,2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
3,2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
4,2014-01-02,medium,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
...,...,...,...,...,...,...,...,...,...,...,...
7100,2019-08-23,medium,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
7101,2019-08-23,medium,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
7102,2019-08-23,medium,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
7103,2019-08-23,medium,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


### MultiIndex sort_index()

In [143]:
tg.loc[(slice('2014-01-02','2014-04-02'),slice(None), 'AAPL'),'open':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,close,high,low
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,low,AAPL,79.38,79.02,79.58,78.86
2014-01-03,low,AAPL,79.00,77.28,79.10,77.20
2014-01-06,low,AAPL,76.78,77.70,78.11,76.23
2014-01-07,low,AAPL,77.76,77.15,77.99,76.85
2014-01-08,low,AAPL,76.97,77.64,77.94,76.96
...,...,...,...,...,...,...
2014-03-27,low,AAPL,77.11,76.78,77.36,76.45
2014-03-28,low,AAPL,76.82,76.69,76.99,76.32
2014-03-31,low,AAPL,77.03,76.68,77.26,76.56
2014-04-01,low,AAPL,76.84,77.38,77.41,76.68


In [151]:
# tg.loc[(slice('2014-01-02','2014-04-02'),slice(None), 'AAPL'),'open':'low']


## unsorted index error

In [147]:
tg.sort_index(inplace=True)

In [150]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745


In [149]:
tg.loc[(slice('2014-01-02','2014-04-02'),slice(None), 'AAPL'),'open':'low']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,open,close,high,low
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,low,AAPL,79.38,79.02,79.58,78.86
2014-01-03,low,AAPL,79.00,77.28,79.10,77.20
2014-01-06,low,AAPL,76.78,77.70,78.11,76.23
2014-01-07,low,AAPL,77.76,77.15,77.99,76.85
2014-01-08,low,AAPL,76.97,77.64,77.94,76.96
...,...,...,...,...,...,...
2014-03-27,low,AAPL,77.11,76.78,77.36,76.45
2014-03-28,low,AAPL,76.82,76.69,76.99,76.32
2014-03-31,low,AAPL,77.03,76.68,77.26,76.56
2014-04-01,low,AAPL,76.84,77.38,77.41,76.68


It is always good to sort the index


- improves retrieval performance, which becomes significant
-- for large dataframes, or
-- frequent retrieval

- enables slicing syntax

- overall a good practice when working with tabular data representation, incluing pandas, excel, sql etc

In [154]:
tg.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745
2014-01-03,low,AAPL,1,2014,3,79.0,77.28,79.1,77.2,14043410
2014-01-03,medium,AMZN,1,2014,3,398.29,396.44,402.71,396.22,2213512
2014-01-03,medium,FB,1,2014,3,55.0,54.56,55.65,54.53,38287706
2014-01-03,medium,GOOGL,1,2014,3,557.5,552.5,558.47,552.47,1669229
2014-01-03,medium,MSFT,1,2014,3,37.2,36.91,37.22,36.6,31134795


In [155]:
# sort one level of multi index in descending order
# sort name in ascending order

### Sorting different levels in different order

In [156]:
tg.sort_index(level=(0,2), ascending=(True, False))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
date,volume_type,name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,medium,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,MSFT,8,2019,23,137.19,133.39,138.35,132.80,38515386
2019-08-23,medium,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141
2019-08-23,medium,FB,8,2019,23,180.84,177.75,183.13,176.66,17331221
2019-08-23,medium,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898


### More Multiindex Methods

In [219]:
tidx = tg.index

In [220]:
tidx

MultiIndex([('2014-01-02',    'low',  'AAPL'),
            ('2014-01-02', 'medium',  'AMZN'),
            ('2014-01-02', 'medium',    'FB'),
            ('2014-01-02', 'medium', 'GOOGL'),
            ('2014-01-02', 'medium',  'MSFT'),
            ('2014-01-03',    'low',  'AAPL'),
            ('2014-01-03', 'medium',  'AMZN'),
            ('2014-01-03', 'medium',    'FB'),
            ('2014-01-03', 'medium', 'GOOGL'),
            ('2014-01-03', 'medium',  'MSFT'),
            ...
            ('2019-08-22',    'low',    'FB'),
            ('2019-08-22',    'low', 'GOOGL'),
            ('2019-08-22', 'medium',  'AAPL'),
            ('2019-08-22', 'medium',  'AMZN'),
            ('2019-08-22', 'medium',  'MSFT'),
            ('2019-08-23', 'medium',  'AAPL'),
            ('2019-08-23', 'medium',  'AMZN'),
            ('2019-08-23', 'medium',    'FB'),
            ('2019-08-23', 'medium', 'GOOGL'),
            ('2019-08-23', 'medium',  'MSFT')],
           names=['Trading Date', 'Volume C

In [221]:
type(tidx)

pandas.core.indexes.multi.MultiIndex

In [222]:
tidx.is_lexsorted()

# lexical graphics sort - alphabetical order
# in lexsort 10 comes before 7
# numbers are treated as individaul letter

## 1 comes before 7

True

In [223]:
tidx.sortlevel(0, ascending=False, sort_remaining=True)

(MultiIndex([('2019-08-23', 'medium',  'MSFT'),
             ('2019-08-23', 'medium', 'GOOGL'),
             ('2019-08-23', 'medium',    'FB'),
             ('2019-08-23', 'medium',  'AMZN'),
             ('2019-08-23', 'medium',  'AAPL'),
             ('2019-08-22', 'medium',  'MSFT'),
             ('2019-08-22', 'medium',  'AMZN'),
             ('2019-08-22', 'medium',  'AAPL'),
             ('2019-08-22',    'low', 'GOOGL'),
             ('2019-08-22',    'low',    'FB'),
             ...
             ('2014-01-03', 'medium',  'MSFT'),
             ('2014-01-03', 'medium', 'GOOGL'),
             ('2014-01-03', 'medium',    'FB'),
             ('2014-01-03', 'medium',  'AMZN'),
             ('2014-01-03',    'low',  'AAPL'),
             ('2014-01-02', 'medium',  'MSFT'),
             ('2014-01-02', 'medium', 'GOOGL'),
             ('2014-01-02', 'medium',    'FB'),
             ('2014-01-02', 'medium',  'AMZN'),
             ('2014-01-02',    'low',  'AAPL')],
            names=['Tr

In [234]:
tidx.sortlevel((0,1,2), ascending=(False,True,True), sort_remaining=True)

## ??

(MultiIndex([('2014-01-02',    'low',  'AAPL'),
             ('2014-01-02', 'medium',  'AMZN'),
             ('2014-01-02', 'medium',    'FB'),
             ('2014-01-02', 'medium', 'GOOGL'),
             ('2014-01-02', 'medium',  'MSFT'),
             ('2014-01-03',    'low',  'AAPL'),
             ('2014-01-03', 'medium',  'AMZN'),
             ('2014-01-03', 'medium',    'FB'),
             ('2014-01-03', 'medium', 'GOOGL'),
             ('2014-01-03', 'medium',  'MSFT'),
             ...
             ('2019-08-22',    'low',    'FB'),
             ('2019-08-22',    'low', 'GOOGL'),
             ('2019-08-22', 'medium',  'AAPL'),
             ('2019-08-22', 'medium',  'AMZN'),
             ('2019-08-22', 'medium',  'MSFT'),
             ('2019-08-23', 'medium',  'AAPL'),
             ('2019-08-23', 'medium',  'AMZN'),
             ('2019-08-23', 'medium',    'FB'),
             ('2019-08-23', 'medium', 'GOOGL'),
             ('2019-08-23', 'medium',  'MSFT')],
            names=['Tr

In [225]:
tidx.set_names(names=['Trading Date','Volume Category','Ticker'],inplace=True)

In [211]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
Trading Date,Volume Category,Ticker,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745


In [212]:
tidx.sortlevel((0,1,2), ascending=(False,False,False))

(MultiIndex([('2014-01-02',    'low',  'AAPL'),
             ('2014-01-02', 'medium',  'AMZN'),
             ('2014-01-02', 'medium',    'FB'),
             ('2014-01-02', 'medium', 'GOOGL'),
             ('2014-01-02', 'medium',  'MSFT'),
             ('2014-01-03',    'low',  'AAPL'),
             ('2014-01-03', 'medium',  'AMZN'),
             ('2014-01-03', 'medium',    'FB'),
             ('2014-01-03', 'medium', 'GOOGL'),
             ('2014-01-03', 'medium',  'MSFT'),
             ...
             ('2019-08-22',    'low',    'FB'),
             ('2019-08-22',    'low', 'GOOGL'),
             ('2019-08-22', 'medium',  'AAPL'),
             ('2019-08-22', 'medium',  'AMZN'),
             ('2019-08-22', 'medium',  'MSFT'),
             ('2019-08-23', 'medium',  'AAPL'),
             ('2019-08-23', 'medium',  'AMZN'),
             ('2019-08-23', 'medium',    'FB'),
             ('2019-08-23', 'medium', 'GOOGL'),
             ('2019-08-23', 'medium',  'MSFT')],
            names=['Tr

In [217]:
# tidx = tidx.sortlevel(0, ascending=False, sort_remaining=True)

In [218]:
# tidx

In [215]:
tg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,month,year,day,open,close,high,low,volume
Trading Date,Volume Category,Ticker,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745


In [228]:
tidx.to_flat_index()

# shows how pandas think of our multilevel index
# ('2014-01-02', 'low', 'AAPL')

Index([    ('2014-01-02', 'low', 'AAPL'),  ('2014-01-02', 'medium', 'AMZN'),
          ('2014-01-02', 'medium', 'FB'), ('2014-01-02', 'medium', 'GOOGL'),
        ('2014-01-02', 'medium', 'MSFT'),     ('2014-01-03', 'low', 'AAPL'),
        ('2014-01-03', 'medium', 'AMZN'),    ('2014-01-03', 'medium', 'FB'),
       ('2014-01-03', 'medium', 'GOOGL'),  ('2014-01-03', 'medium', 'MSFT'),
       ...
             ('2019-08-22', 'low', 'FB'),    ('2019-08-22', 'low', 'GOOGL'),
        ('2019-08-22', 'medium', 'AAPL'),  ('2019-08-22', 'medium', 'AMZN'),
        ('2019-08-22', 'medium', 'MSFT'),  ('2019-08-23', 'medium', 'AAPL'),
        ('2019-08-23', 'medium', 'AMZN'),    ('2019-08-23', 'medium', 'FB'),
       ('2019-08-23', 'medium', 'GOOGL'),  ('2019-08-23', 'medium', 'MSFT')],
      dtype='object', length=7105)

In [227]:
tidx.to_flat_index().shape

(7105,)

In [230]:
tg.close

# like multi index series

Trading Date  Volume Category  Ticker
2014-01-02    low              AAPL        79.02
              medium           AMZN       397.97
                               FB          54.71
                               GOOGL      556.56
                               MSFT        37.16
                                          ...   
2019-08-23    medium           AAPL       202.64
                               AMZN      1749.62
                               FB         177.75
                               GOOGL     1153.58
                               MSFT       133.39
Name: close, Length: 7105, dtype: float64

In [232]:
# multi index represent tight coupling of lables
# hierarchy between labels and asscoicated values

### Reshaping with stack()

In [235]:
 tg.stack()

Trading Date  Volume Category  Ticker        
2014-01-02    low              AAPL    month            1.00
                                       year          2014.00
                                       day              2.00
                                       open            79.38
                                       close           79.02
                                                    ...     
2019-08-23    medium           MSFT    open           137.19
                                       close          133.39
                                       high           138.35
                                       low            132.80
                                       volume    38515386.00
Length: 56840, dtype: float64

In [236]:
stacked =  tg.stack()

In [240]:
stacked.head()

# transfer of the dim from col axis to wider multi index

Trading Date  Volume Category  Ticker       
2014-01-02    low              AAPL    month       1.00
                                       year     2014.00
                                       day         2.00
                                       open       79.38
                                       close      79.02
dtype: float64

In [239]:
type(stacked)

pandas.core.series.Series

In [242]:
# from 3 level multi-index df with 1 level col axi  to 4 level multi  index series
# containig single value

In [243]:
stacked.index.nlevels

4

In [246]:
stacked.index.names
# immutable

FrozenList(['Trading Date', 'Volume Category', 'Ticker', None])

In [247]:
# create a new instance

names = stacked.index.names

In [248]:
stacked.index.set_names(['Trading Date', 'Volume Category', 'Ticker', 'Previously a Column Axis'], inplace=True)

In [249]:
stacked.head()

Trading Date  Volume Category  Ticker  Previously a Column Axis
2014-01-02    low              AAPL    month                          1.00
                                       year                        2014.00
                                       day                            2.00
                                       open                          79.38
                                       close                         79.02
dtype: float64

### unstack(), The flipside

In [250]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Previously a Column Axis,month,year,day,open,close,high,low,volume
Trading Date,Volume Category,Ticker,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1.0,2014.0,2.0,79.38,79.02,79.58,78.86,8398851.0
2014-01-02,medium,AMZN,1.0,2014.0,2.0,398.80,397.97,399.36,394.02,2140246.0
2014-01-02,medium,FB,1.0,2014.0,2.0,54.86,54.71,55.22,54.19,43257622.0
2014-01-02,medium,GOOGL,1.0,2014.0,2.0,557.73,556.56,558.88,554.13,1822719.0
2014-01-02,medium,MSFT,1.0,2014.0,2.0,37.35,37.16,37.40,37.10,30643745.0
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,AAPL,8.0,2019.0,23.0,209.43,202.64,212.05,201.00,46882843.0
2019-08-23,medium,AMZN,8.0,2019.0,23.0,1793.03,1749.62,1804.90,1745.23,5277898.0
2019-08-23,medium,FB,8.0,2019.0,23.0,180.84,177.75,183.13,176.66,17331221.0
2019-08-23,medium,GOOGL,8.0,2019.0,23.0,1185.17,1153.58,1195.67,1150.00,1813141.0


In [252]:
stacked.unstack().unstack()

# second innermost will be column

Unnamed: 0_level_0,Previously a Column Axis,month,month,month,month,month,year,year,year,year,year,...,low,low,low,low,low,volume,volume,volume,volume,volume
Unnamed: 0_level_1,Ticker,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT,...,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT
Trading Date,Volume Category,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
2014-01-02,low,1.0,,,,,2014.0,,,,,...,78.86,,,,,8398851.0,,,,
2014-01-02,medium,,1.0,1.0,1.0,1.0,,2014.0,2014.0,2014.0,2014.0,...,,394.02,54.19,554.13,37.10,,2140246.0,43257622.0,1822719.0,30643745.0
2014-01-03,low,1.0,,,,,2014.0,,,,,...,77.20,,,,,14043410.0,,,,
2014-01-03,medium,,1.0,1.0,1.0,1.0,,2014.0,2014.0,2014.0,2014.0,...,,396.22,54.53,552.47,36.60,,2213512.0,38287706.0,1669229.0,31134795.0
2014-01-06,high,,,1.0,,,,,2014.0,,,...,,,54.05,,,,,68974359.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-21,low,,8.0,8.0,8.0,8.0,,2019.0,2019.0,2019.0,2019.0,...,,1815.00,183.14,1187.92,138.00,,2039231.0,8409548.0,708272.0,14982314.0
2019-08-21,medium,8.0,,,,,2019.0,,,,,...,211.60,,,,,21564747.0,,,,
2019-08-22,low,,,8.0,8.0,,,,2019.0,2019.0,,...,,,179.91,1178.91,,,,10829509.0,867915.0,
2019-08-22,medium,8.0,8.0,,,8.0,2019.0,2019.0,,,2019.0,...,210.75,1800.10,,,136.29,22267819.0,2658388.0,,,18559088.0


In [253]:
stacked.unstack().unstack().unstack()

Previously a Column Axis,month,month,month,month,month,month,month,month,month,month,...,volume,volume,volume,volume,volume,volume,volume,volume,volume,volume
Ticker,AAPL,AAPL,AAPL,AMZN,AMZN,AMZN,FB,FB,FB,GOOGL,...,AMZN,FB,FB,FB,GOOGL,GOOGL,GOOGL,MSFT,MSFT,MSFT
Volume Category,high,low,medium,high,low,medium,high,low,medium,high,...,medium,high,low,medium,high,low,medium,high,low,medium
Trading Date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2014-01-02,,1.0,,,,1.0,,,1.0,,...,2140246.0,,,43257622.0,,,1822719.0,,,30643745.0
2014-01-03,,1.0,,,,1.0,,,1.0,,...,2213512.0,,,38287706.0,,,1669229.0,,,31134795.0
2014-01-06,,1.0,,,,1.0,1.0,,,,...,3172207.0,68974359.0,,,,,1770782.0,,,43615035.0
2014-01-07,,1.0,,,1.0,,1.0,,,,...,,77329009.0,,,,,2553999.0,,,35924726.0
2014-01-08,,1.0,,,,1.0,1.0,,,,...,2316903.0,56800776.0,,,,,2243444.0,,,59979542.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-19,,,8.0,,,8.0,,8.0,,,...,2820303.0,,9699661.0,,,,1222854.0,,,24370543.0
2019-08-20,,,8.0,,8.0,,,8.0,,,...,,,10087592.0,,,,1010566.0,,,21188998.0
2019-08-21,,,8.0,,8.0,,,8.0,,,...,,,8409548.0,,,708272.0,,,14982314.0,
2019-08-22,,,8.0,,,8.0,,8.0,,,...,2658388.0,,10829509.0,,,867915.0,,,,18559088.0


In [254]:
stacked.unstack().unstack(fill_value='-')

Unnamed: 0_level_0,Previously a Column Axis,month,month,month,month,month,year,year,year,year,year,...,low,low,low,low,low,volume,volume,volume,volume,volume
Unnamed: 0_level_1,Ticker,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT,...,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT
Trading Date,Volume Category,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
2014-01-02,low,1.0,-,-,-,-,2014.0,-,-,-,-,...,78.86,-,-,-,-,8398851.0,-,-,-,-
2014-01-02,medium,-,1.0,1.0,1.0,1.0,-,2014.0,2014.0,2014.0,2014.0,...,-,394.02,54.19,554.13,37.1,-,2140246.0,43257622.0,1822719.0,30643745.0
2014-01-03,low,1.0,-,-,-,-,2014.0,-,-,-,-,...,77.2,-,-,-,-,14043410.0,-,-,-,-
2014-01-03,medium,-,1.0,1.0,1.0,1.0,-,2014.0,2014.0,2014.0,2014.0,...,-,396.22,54.53,552.47,36.6,-,2213512.0,38287706.0,1669229.0,31134795.0
2014-01-06,high,-,-,1.0,-,-,-,-,2014.0,-,-,...,-,-,54.05,-,-,-,-,68974359.0,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-21,low,-,8.0,8.0,8.0,8.0,-,2019.0,2019.0,2019.0,2019.0,...,-,1815.0,183.14,1187.92,138.0,-,2039231.0,8409548.0,708272.0,14982314.0
2019-08-21,medium,8.0,-,-,-,-,2019.0,-,-,-,-,...,211.6,-,-,-,-,21564747.0,-,-,-,-
2019-08-22,low,-,-,8.0,8.0,-,-,-,2019.0,2019.0,-,...,-,-,179.91,1178.91,-,-,-,10829509.0,867915.0,-
2019-08-22,medium,8.0,8.0,-,-,8.0,2019.0,2019.0,-,-,2019.0,...,210.75,1800.1,-,-,136.29,22267819.0,2658388.0,-,-,18559088.0


In [255]:
stacked

Trading Date  Volume Category  Ticker  Previously a Column Axis
2014-01-02    low              AAPL    month                              1.00
                                       year                            2014.00
                                       day                                2.00
                                       open                              79.38
                                       close                             79.02
                                                                      ...     
2019-08-23    medium           MSFT    open                             137.19
                                       close                            133.39
                                       high                             138.35
                                       low                              132.80
                                       volume                      38515386.00
Length: 56840, dtype: float64

In [256]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Previously a Column Axis,month,year,day,open,close,high,low,volume
Trading Date,Volume Category,Ticker,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1.0,2014.0,2.0,79.38,79.02,79.58,78.86,8398851.0
2014-01-02,medium,AMZN,1.0,2014.0,2.0,398.80,397.97,399.36,394.02,2140246.0
2014-01-02,medium,FB,1.0,2014.0,2.0,54.86,54.71,55.22,54.19,43257622.0
2014-01-02,medium,GOOGL,1.0,2014.0,2.0,557.73,556.56,558.88,554.13,1822719.0
2014-01-02,medium,MSFT,1.0,2014.0,2.0,37.35,37.16,37.40,37.10,30643745.0
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,AAPL,8.0,2019.0,23.0,209.43,202.64,212.05,201.00,46882843.0
2019-08-23,medium,AMZN,8.0,2019.0,23.0,1793.03,1749.62,1804.90,1745.23,5277898.0
2019-08-23,medium,FB,8.0,2019.0,23.0,180.84,177.75,183.13,176.66,17331221.0
2019-08-23,medium,GOOGL,8.0,2019.0,23.0,1185.17,1153.58,1195.67,1150.00,1813141.0


In [257]:
# if we want volume_category to be acting as columns

In [258]:
stacked.unstack(level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume Category,high,low,medium
Trading Date,Ticker,Previously a Column Axis,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,AAPL,month,,1.00,
2014-01-02,AAPL,year,,2014.00,
2014-01-02,AAPL,day,,2.00,
2014-01-02,AAPL,open,,79.38,
2014-01-02,AAPL,close,,79.02,
...,...,...,...,...,...
2019-08-23,MSFT,open,,,137.19
2019-08-23,MSFT,close,,,133.39
2019-08-23,MSFT,high,,,138.35
2019-08-23,MSFT,low,,,132.80


In [260]:
# using the name

stacked.unstack(level='Ticker')

Unnamed: 0_level_0,Unnamed: 1_level_0,Ticker,AAPL,AMZN,FB,GOOGL,MSFT
Trading Date,Volume Category,Previously a Column Axis,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-02,low,month,1.00,,,,
2014-01-02,low,year,2014.00,,,,
2014-01-02,low,day,2.00,,,,
2014-01-02,low,open,79.38,,,,
2014-01-02,low,close,79.02,,,,
...,...,...,...,...,...,...,...
2019-08-23,medium,open,209.43,1793.03,180.84,1185.17,137.19
2019-08-23,medium,close,202.64,1749.62,177.75,1153.58,133.39
2019-08-23,medium,high,212.05,1804.90,183.13,1195.67,138.35
2019-08-23,medium,low,201.00,1745.23,176.66,1150.00,132.80


### Creating MultiLevel Columns Manually

In [261]:
stacked.unstack().unstack()

Unnamed: 0_level_0,Previously a Column Axis,month,month,month,month,month,year,year,year,year,year,...,low,low,low,low,low,volume,volume,volume,volume,volume
Unnamed: 0_level_1,Ticker,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT,...,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT
Trading Date,Volume Category,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
2014-01-02,low,1.0,,,,,2014.0,,,,,...,78.86,,,,,8398851.0,,,,
2014-01-02,medium,,1.0,1.0,1.0,1.0,,2014.0,2014.0,2014.0,2014.0,...,,394.02,54.19,554.13,37.10,,2140246.0,43257622.0,1822719.0,30643745.0
2014-01-03,low,1.0,,,,,2014.0,,,,,...,77.20,,,,,14043410.0,,,,
2014-01-03,medium,,1.0,1.0,1.0,1.0,,2014.0,2014.0,2014.0,2014.0,...,,396.22,54.53,552.47,36.60,,2213512.0,38287706.0,1669229.0,31134795.0
2014-01-06,high,,,1.0,,,,,2014.0,,,...,,,54.05,,,,,68974359.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-21,low,,8.0,8.0,8.0,8.0,,2019.0,2019.0,2019.0,2019.0,...,,1815.00,183.14,1187.92,138.00,,2039231.0,8409548.0,708272.0,14982314.0
2019-08-21,medium,8.0,,,,,2019.0,,,,,...,211.60,,,,,21564747.0,,,,
2019-08-22,low,,,8.0,8.0,,,,2019.0,2019.0,,...,,,179.91,1178.91,,,,10829509.0,867915.0,
2019-08-22,medium,8.0,8.0,,,8.0,2019.0,2019.0,,,2019.0,...,210.75,1800.10,,,136.29,22267819.0,2658388.0,,,18559088.0


In [262]:
tg.reset_index(inplace=True)

In [263]:
tg.head()

Unnamed: 0,Trading Date,Volume Category,Ticker,month,year,day,open,close,high,low,volume
0,2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
1,2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2,2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
3,2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
4,2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745


In [264]:
## step1: create multi-index

In [267]:
# cartesian-product

cols = pd.MultiIndex.from_product([['low','high'],['MSFT','AMZN']], names=['Volume Category','Ticker'])

In [268]:
cols

MultiIndex([( 'low', 'MSFT'),
            ( 'low', 'AMZN'),
            ('high', 'MSFT'),
            ('high', 'AMZN')],
           names=['Volume Category', 'Ticker'])

In [269]:
# step2: prep the underlying data

In [272]:
low = tg['Volume Category']=='low'
high = tg['Volume Category']=='high'
amzn = tg['Ticker']=='AMZN'
msft = tg['Ticker']=='MSFT'

In [275]:
[
tg[low & msft].close.sample(10).values,
tg[low & amzn].close.sample(10).values,
tg[high & msft].close.sample(10).values,
tg[high & amzn].close.sample(10).values  

]

[array([ 41.27,  73.87,  68.77, 137.46,  72.83,  55.67, 132.85,  77.91,
         75.21,  83.26]),
 array([ 335.78,  780.37,  853.42,  948.95, 1182.26,  767.58, 1581.4 ,
         673.26,  764.63,  770.6 ]),
 array([ 50.14,  55.91,  46.1 ,  42.01,  37.84,  40.47,  53.49, 103.69,
         42.25, 106.16]),
 array([ 327.82, 1692.69,  324.01,  323.  ,  482.18, 1517.96, 1478.02,
         599.03, 1668.4 ,  297.25])]

In [276]:
data = [
tg[low & msft].close.sample(10).values,
tg[low & amzn].close.sample(10).values,
tg[high & msft].close.sample(10).values,
tg[high & amzn].close.sample(10).values  

]

In [278]:
data = [[*i] for i in zip(*data)]

In [277]:
[[*i] for i in zip(*data)]

[[119.93, 335.13, 50.16, 1598.01],
 [85.71, 780.37, 47.87, 324.01],
 [126.24, 772.56, 101.71, 659.59],
 [41.27, 426.57, 49.16, 327.82],
 [72.72, 531.52, 50.99, 1817.27],
 [120.33, 1581.4, 41.19, 313.18],
 [62.99, 430.99, 50.14, 1470.9],
 [120.77, 770.6, 40.4, 599.03],
 [103.07, 306.78, 59.66, 358.69],
 [74.69, 386.04, 47.66, 1642.81]]

In [283]:
[i for i in zip(data)]

[([119.93, 335.13, 50.16, 1598.01],),
 ([85.71, 780.37, 47.87, 324.01],),
 ([126.24, 772.56, 101.71, 659.59],),
 ([41.27, 426.57, 49.16, 327.82],),
 ([72.72, 531.52, 50.99, 1817.27],),
 ([120.33, 1581.4, 41.19, 313.18],),
 ([62.99, 430.99, 50.14, 1470.9],),
 ([120.77, 770.6, 40.4, 599.03],),
 ([103.07, 306.78, 59.66, 358.69],),
 ([74.69, 386.04, 47.66, 1642.81],)]

In [287]:
[[*i] for i in zip(data)]

[[[119.93, 335.13, 50.16, 1598.01]],
 [[85.71, 780.37, 47.87, 324.01]],
 [[126.24, 772.56, 101.71, 659.59]],
 [[41.27, 426.57, 49.16, 327.82]],
 [[72.72, 531.52, 50.99, 1817.27]],
 [[120.33, 1581.4, 41.19, 313.18]],
 [[62.99, 430.99, 50.14, 1470.9]],
 [[120.77, 770.6, 40.4, 599.03]],
 [[103.07, 306.78, 59.66, 358.69]],
 [[74.69, 386.04, 47.66, 1642.81]]]

In [291]:
df= pd.DataFrame(data,columns=cols)

In [292]:
df.columns.nlevels

2

### using Transpose to obtain the same results

In [294]:
## multiindex columns = set_index() + transpose()

# set_index = create multi index
# transpose = swap columns with names

In [295]:
tg.head()

Unnamed: 0,Trading Date,Volume Category,Ticker,month,year,day,open,close,high,low,volume
0,2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
1,2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2,2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
3,2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
4,2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745


In [297]:
tg.set_index(['Trading Date','Volume Category'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Ticker,month,year,day,open,close,high,low,volume
Trading Date,Volume Category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
2014-01-02,medium,AMZN,1,2014,2,398.80,397.97,399.36,394.02,2140246
2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.40,37.10,30643745
...,...,...,...,...,...,...,...,...,...,...
2019-08-23,medium,AAPL,8,2019,23,209.43,202.64,212.05,201.00,46882843
2019-08-23,medium,AMZN,8,2019,23,1793.03,1749.62,1804.90,1745.23,5277898
2019-08-23,medium,FB,8,2019,23,180.84,177.75,183.13,176.66,17331221
2019-08-23,medium,GOOGL,8,2019,23,1185.17,1153.58,1195.67,1150.00,1813141


In [298]:
tg.set_index(['Trading Date','Volume Category']).T

Trading Date,2014-01-02,2014-01-02,2014-01-02,2014-01-02,2014-01-02,2014-01-03,2014-01-03,2014-01-03,2014-01-03,2014-01-03,...,2019-08-22,2019-08-22,2019-08-22,2019-08-22,2019-08-22,2019-08-23,2019-08-23,2019-08-23,2019-08-23,2019-08-23
Volume Category,low,medium,medium.1,medium.2,medium.3,low,medium,medium.1,medium.2,medium.3,...,low,low.1,medium,medium.1,medium.2,medium,medium.1,medium.2,medium.3,medium.4
Ticker,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT,...,FB,GOOGL,AAPL,AMZN,MSFT,AAPL,AMZN,FB,GOOGL,MSFT
month,1,1,1,1,1,1,1,1,1,1,...,8,8,8,8,8,8,8,8,8,8
year,2014,2014,2014,2014,2014,2014,2014,2014,2014,2014,...,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019
day,2,2,2,2,2,3,3,3,3,3,...,22,22,22,22,22,23,23,23,23,23
open,79.38,398.8,54.86,557.73,37.35,79.0,398.29,55.0,557.5,37.2,...,183.43,1193.8,213.19,1828.0,138.66,209.43,1793.03,180.84,1185.17,137.19
close,79.02,397.97,54.71,556.56,37.16,77.28,396.44,54.56,552.5,36.91,...,182.04,1191.52,212.46,1805.6,137.78,202.64,1749.62,177.75,1153.58,133.39
high,79.58,399.36,55.22,558.88,37.4,79.1,402.71,55.65,558.47,37.22,...,184.11,1198.78,214.44,1829.41,139.2,212.05,1804.9,183.13,1195.67,138.35
low,78.86,394.02,54.19,554.13,37.1,77.2,396.22,54.53,552.47,36.6,...,179.91,1178.91,210.75,1800.1,136.29,201.0,1745.23,176.66,1150.0,132.8
volume,8398851,2140246,43257622,1822719,30643745,14043410,2213512,38287706,1669229,31134795,...,10829509,867915,22267819,2658388,18559088,46882843,5277898,17331221,1813141,38515386


In [299]:
tg.set_index(['Trading Date','Volume Category']).transpose()

Trading Date,2014-01-02,2014-01-02,2014-01-02,2014-01-02,2014-01-02,2014-01-03,2014-01-03,2014-01-03,2014-01-03,2014-01-03,...,2019-08-22,2019-08-22,2019-08-22,2019-08-22,2019-08-22,2019-08-23,2019-08-23,2019-08-23,2019-08-23,2019-08-23
Volume Category,low,medium,medium.1,medium.2,medium.3,low,medium,medium.1,medium.2,medium.3,...,low,low.1,medium,medium.1,medium.2,medium,medium.1,medium.2,medium.3,medium.4
Ticker,AAPL,AMZN,FB,GOOGL,MSFT,AAPL,AMZN,FB,GOOGL,MSFT,...,FB,GOOGL,AAPL,AMZN,MSFT,AAPL,AMZN,FB,GOOGL,MSFT
month,1,1,1,1,1,1,1,1,1,1,...,8,8,8,8,8,8,8,8,8,8
year,2014,2014,2014,2014,2014,2014,2014,2014,2014,2014,...,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019
day,2,2,2,2,2,3,3,3,3,3,...,22,22,22,22,22,23,23,23,23,23
open,79.38,398.8,54.86,557.73,37.35,79.0,398.29,55.0,557.5,37.2,...,183.43,1193.8,213.19,1828.0,138.66,209.43,1793.03,180.84,1185.17,137.19
close,79.02,397.97,54.71,556.56,37.16,77.28,396.44,54.56,552.5,36.91,...,182.04,1191.52,212.46,1805.6,137.78,202.64,1749.62,177.75,1153.58,133.39
high,79.58,399.36,55.22,558.88,37.4,79.1,402.71,55.65,558.47,37.22,...,184.11,1198.78,214.44,1829.41,139.2,212.05,1804.9,183.13,1195.67,138.35
low,78.86,394.02,54.19,554.13,37.1,77.2,396.22,54.53,552.47,36.6,...,179.91,1178.91,210.75,1800.1,136.29,201.0,1745.23,176.66,1150.0,132.8
volume,8398851,2140246,43257622,1822719,30643745,14043410,2213512,38287706,1669229,31134795,...,10829509,867915,22267819,2658388,18559088,46882843,5277898,17331221,1813141,38515386


In [300]:
tg.head()

Unnamed: 0,Trading Date,Volume Category,Ticker,month,year,day,open,close,high,low,volume
0,2014-01-02,low,AAPL,1,2014,2,79.38,79.02,79.58,78.86,8398851
1,2014-01-02,medium,AMZN,1,2014,2,398.8,397.97,399.36,394.02,2140246
2,2014-01-02,medium,FB,1,2014,2,54.86,54.71,55.22,54.19,43257622
3,2014-01-02,medium,GOOGL,1,2014,2,557.73,556.56,558.88,554.13,1822719
4,2014-01-02,medium,MSFT,1,2014,2,37.35,37.16,37.4,37.1,30643745


In [302]:
tg_df3 = tg.set_index(['year','month','day'])

In [303]:
tg_df3

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Trading Date,Volume Category,Ticker,open,close,high,low,volume
year,month,day,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014,1,2,2014-01-02,low,AAPL,79.38,79.02,79.58,78.86,8398851
2014,1,2,2014-01-02,medium,AMZN,398.80,397.97,399.36,394.02,2140246
2014,1,2,2014-01-02,medium,FB,54.86,54.71,55.22,54.19,43257622
2014,1,2,2014-01-02,medium,GOOGL,557.73,556.56,558.88,554.13,1822719
2014,1,2,2014-01-02,medium,MSFT,37.35,37.16,37.40,37.10,30643745
...,...,...,...,...,...,...,...,...,...,...
2019,8,23,2019-08-23,medium,AAPL,209.43,202.64,212.05,201.00,46882843
2019,8,23,2019-08-23,medium,AMZN,1793.03,1749.62,1804.90,1745.23,5277898
2019,8,23,2019-08-23,medium,FB,180.84,177.75,183.13,176.66,17331221
2019,8,23,2019-08-23,medium,GOOGL,1185.17,1153.58,1195.67,1150.00,1813141


In [315]:
## select all trading date from year 2019

tg_df3.loc[(slice(2019)),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Trading Date,Volume Category,Ticker,open,close,high,low,volume
year,month,day,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014,1,2,2014-01-02,low,AAPL,79.38,79.02,79.58,78.86,8398851
2014,1,2,2014-01-02,medium,AMZN,398.80,397.97,399.36,394.02,2140246
2014,1,2,2014-01-02,medium,FB,54.86,54.71,55.22,54.19,43257622
2014,1,2,2014-01-02,medium,GOOGL,557.73,556.56,558.88,554.13,1822719
2014,1,2,2014-01-02,medium,MSFT,37.35,37.16,37.40,37.10,30643745
...,...,...,...,...,...,...,...,...,...,...
2019,8,23,2019-08-23,medium,AAPL,209.43,202.64,212.05,201.00,46882843
2019,8,23,2019-08-23,medium,AMZN,1793.03,1749.62,1804.90,1745.23,5277898
2019,8,23,2019-08-23,medium,FB,180.84,177.75,183.13,176.66,17331221
2019,8,23,2019-08-23,medium,GOOGL,1185.17,1153.58,1195.67,1150.00,1813141


In [325]:
tg_df3.loc[(2019,slice(None),slice(None)),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Trading Date,Volume Category,Ticker,open,close,high,low,volume
year,month,day,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019,1,2,2019-01-02,medium,AAPL,154.89,157.92,158.85,154.23,37039737
2019,1,2,2019-01-02,medium,AMZN,1465.20,1539.13,1553.36,1460.93,7983103
2019,1,2,2019-01-02,medium,FB,128.99,135.68,137.51,128.56,28146193
2019,1,2,2019-01-02,medium,GOOGL,1027.20,1054.68,1060.79,1025.28,1593395
2019,1,2,2019-01-02,medium,MSFT,99.55,101.12,101.75,98.94,35329345
2019,...,...,...,...,...,...,...,...,...,...
2019,8,23,2019-08-23,medium,AAPL,209.43,202.64,212.05,201.00,46882843
2019,8,23,2019-08-23,medium,AMZN,1793.03,1749.62,1804.90,1745.23,5277898
2019,8,23,2019-08-23,medium,FB,180.84,177.75,183.13,176.66,17331221
2019,8,23,2019-08-23,medium,GOOGL,1185.17,1153.58,1195.67,1150.00,1813141


In [362]:
tech_series = tg_df3.loc[(2019, slice(None), slice(None)),:].set_index('Trading Date',append=True,drop=False)

In [363]:
tech_series

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Trading Date,Volume Category,Ticker,open,close,high,low,volume
year,month,day,Trading Date,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019,1,2,2019-01-02,2019-01-02,medium,AAPL,154.89,157.92,158.85,154.23,37039737
2019,1,2,2019-01-02,2019-01-02,medium,AMZN,1465.20,1539.13,1553.36,1460.93,7983103
2019,1,2,2019-01-02,2019-01-02,medium,FB,128.99,135.68,137.51,128.56,28146193
2019,1,2,2019-01-02,2019-01-02,medium,GOOGL,1027.20,1054.68,1060.79,1025.28,1593395
2019,1,2,2019-01-02,2019-01-02,medium,MSFT,99.55,101.12,101.75,98.94,35329345
2019,...,...,...,...,...,...,...,...,...,...,...
2019,8,23,2019-08-23,2019-08-23,medium,AAPL,209.43,202.64,212.05,201.00,46882843
2019,8,23,2019-08-23,2019-08-23,medium,AMZN,1793.03,1749.62,1804.90,1745.23,5277898
2019,8,23,2019-08-23,2019-08-23,medium,FB,180.84,177.75,183.13,176.66,17331221
2019,8,23,2019-08-23,2019-08-23,medium,GOOGL,1185.17,1153.58,1195.67,1150.00,1813141


In [319]:
tech_series = tg_df3.loc[(slice(2019)),:].set_index('Trading Date',append=True)

In [320]:
tech_series

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Volume Category,Ticker,open,close,high,low,volume
year,month,day,Trading Date,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014,1,2,2014-01-02,low,AAPL,79.38,79.02,79.58,78.86,8398851
2014,1,2,2014-01-02,medium,AMZN,398.80,397.97,399.36,394.02,2140246
2014,1,2,2014-01-02,medium,FB,54.86,54.71,55.22,54.19,43257622
2014,1,2,2014-01-02,medium,GOOGL,557.73,556.56,558.88,554.13,1822719
2014,1,2,2014-01-02,medium,MSFT,37.35,37.16,37.40,37.10,30643745
...,...,...,...,...,...,...,...,...,...,...
2019,8,23,2019-08-23,medium,AAPL,209.43,202.64,212.05,201.00,46882843
2019,8,23,2019-08-23,medium,AMZN,1793.03,1749.62,1804.90,1745.23,5277898
2019,8,23,2019-08-23,medium,FB,180.84,177.75,183.13,176.66,17331221
2019,8,23,2019-08-23,medium,GOOGL,1185.17,1153.58,1195.67,1150.00,1813141


In [364]:
tech_series.close.values.mean()

686.3976196319018

In [365]:
tech_series.close.values.std()

676.8112458511995

In [329]:
# add the existing column axis as the 4th level of the multiindex

tech_series = tg_df3.loc[(2019,slice(None),slice(None)),:].stack()

In [330]:
tech_series

year  month  day                 
2019  1      2    Trading Date       2019-01-02
                  Volume Category        medium
                  Ticker                   AAPL
                  open                   154.89
                  close                  157.92
                                        ...    
      8      23   open                   137.19
                  close                  133.39
                  high                   138.35
                  low                     132.8
                  volume               38515386
Length: 6520, dtype: object

In [349]:
tech_series.index.names

FrozenList(['year', 'month', 'day', None])

In [348]:
# for i,v in enumerate(tech_series.values):
#     if (i+1)%4==0:
#         print(v)

In [353]:
tech_series.loc[(slice(None),slice(None),slice(None), 'close')]

year  month  day
2019  1      2       157.92
             2      1539.13
             2       135.68
             2      1054.68
             2       101.12
                     ...   
      8      23      202.64
             23     1749.62
             23      177.75
             23     1153.58
             23      133.39
Length: 815, dtype: object

In [354]:
tech_series.loc[(slice(None),slice(None),slice(None), 'close')].values

array([157.92, 1539.13, 135.68, 1054.68, 101.12, 142.19, 1500.28, 131.74,
       1025.47, 97.4, 1575.39, 148.26, 137.95, 1078.07, 101.93, 147.93,
       1629.51, 138.05, 1075.92, 102.06, 1656.58, 150.75, 142.53, 1085.37,
       102.8, 153.31, 1659.42, 144.23, 1081.65, 104.27, 153.8, 1656.22,
       144.2, 1078.83, 103.6, 143.8, 152.29, 1640.56, 1064.47, 102.8,
       150.0, 1617.21, 145.39, 1051.51, 102.05, 153.07, 1674.56, 148.95,
       1086.51, 105.01, 154.94, 1683.78, 147.54, 1089.51, 105.38, 155.86,
       1693.22, 148.3, 1099.12, 106.12, 156.82, 1696.2, 150.04, 1107.3,
       107.71, 153.3, 1632.17, 147.57, 1078.63, 105.68, 153.92, 1640.02,
       144.3, 1084.41, 106.71, 152.7, 1654.93, 145.83, 1084.0, 106.2,
       157.76, 1670.57, 149.01, 1101.51, 107.17, 156.3, 1637.89, 147.47,
       1079.86, 105.08, 154.68, 1593.88, 144.19, 1070.06, 102.94, 165.25,
       1670.43, 150.42, 1097.99, 106.38, 1718.73, 166.69, 166.44, 1125.89,
       104.43, 1626.23, 166.52, 165.71, 1118.62, 102.

In [355]:
tech_series.loc[(slice(None),slice(None),slice(None), 'close')].values.mean()

686.3976196319019

In [356]:
tech_series.loc[(slice(None),slice(None),slice(None), 'close')].values.std()

676.8112458512002

In [357]:
tech_series.loc[(slice(None),slice(None),slice(None), 'close')].apply({'Average Price':'mean', 'Standard Deviation':'std'})

Average Price         686.39762
Standard Deviation    677.22685
dtype: float64