Hierarchical Indexing i.e. multi indexing allows multiple index levels within single index

In [1]:
import numpy as np
import pandas as pd

In [2]:
#Specifying multiple index - Bad way
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]

populations = [33871648, 37253956,
               18976457, 19378102,
               20851820, 25145561]
pop = pd.Series(populations, index=index)
pop


(California, 2000)    33871648
(California, 2010)    37253956
(New York, 2000)      18976457
(New York, 2010)      19378102
(Texas, 2000)         20851820
(Texas, 2010)         25145561
dtype: int64

In [3]:
pop['California',2010]

37253956

Try this

In [4]:
pop['California':'New York']


TypeError: ignored

In [None]:
#Create Multiindex object properly
index = pd.MultiIndex.from_tuples(index)
index

In [None]:
index.codes

In [None]:
index.levels

In [14]:
#Better way of indexing; easy for querying data
pop = pop.reindex(index)
pop

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [None]:
index

In [11]:
type(pop)

pandas.core.series.Series

In [12]:
pop['California',2010]

37253956

In [None]:
pop['California':'New York']

In [None]:
pop[:, 2010]

In [None]:
pop['California',]

In [15]:
pop_df = pop.unstack()
pop_df


Unnamed: 0,2000,2010
California,33871648,37253956
New York,18976457,19378102
Texas,20851820,25145561


In [16]:
pop_df.stack()

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [17]:
pop_df = pop.unstack(level=0)
pop_df


Unnamed: 0,California,New York,Texas
2000,33871648,18976457,20851820
2010,37253956,19378102,25145561


In [None]:
pop_df.stack()

- Multi indexing allows to represent 2 dimentional data within one- dimentional series
- We can also represent three or more dimentions in series or data frame.
- Each extra level in multi - index represents an extra dimention of data.

# Multi Index Assignment - Lets Try - 
##Multi Indexing.Assignment 5docx.docx 

In [None]:
stocks = pd.read_csv("http://bit.ly/smallstocks")
# Shares value by closing time

In [None]:
stocks

In [None]:
stocks.index

In [None]:
stocks.groupby('Symbol').Close.mean()

In [None]:
stocks.groupby(['Symbol', 'Date']).Close.mean()

In [None]:
ser = stocks.groupby(['Symbol', 'Date']).Close.mean()
ser.index

Creating same view using pivot_table

In [None]:
#df = stocks.pivot_table(values='Close',index='Symbol', columns="Date")
#df

In [None]:
stocks.set_index(['Symbol','Date'],inplace=True)
stocks

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
AAPL,2016-10-03,112.52,21701800
MSFT,2016-10-03,57.42,19189500
AAPL,2016-10-04,113.0,29736800
MSFT,2016-10-04,57.24,20085900
CSCO,2016-10-04,31.35,18460400
MSFT,2016-10-05,57.64,16726400
CSCO,2016-10-05,31.59,11808600
AAPL,2016-10-05,113.05,21453100


In [None]:
stocks.sort_index(inplace=True)
stocks

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701800
AAPL,2016-10-04,113.0,29736800
AAPL,2016-10-05,113.05,21453100
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-04,31.35,18460400
CSCO,2016-10-05,31.59,11808600
MSFT,2016-10-03,57.42,19189500
MSFT,2016-10-04,57.24,20085900
MSFT,2016-10-05,57.64,16726400


In [None]:
stocks.loc[('MSFT',['2016-10-04','2016-10-05']),:]

In [None]:
stocks.loc[('MSFT',['2016-10-04','2016-10-05']),:]

In [None]:
stocks.loc[:,'2016-10-04',:]

In [None]:
stocks[stocks['Volume']==11808600]

In [None]:
stocks.loc[:,['Close','Volume']]

In [None]:
stocks.loc[(['AAPL','MSFT'],'2016-10-03'), :]

In [None]:
stocks.loc[('AAPL','2016-10-03'), :]

In [None]:
stocks.loc[('AAPL',), :]  #stocks.loc['AAPL']

In [None]:
stocks.loc[('AAPL','2016-10-03'), 'Close':'Volume']

In [None]:
stocks.loc[(['AAPL','MSFT'],'2016-10-03'), :]

SyntaxError: ignored

In [None]:
stocks.loc['AAPL':'MSFT','2016-10-03', :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701800
CSCO,2016-10-03,31.5,14070500
MSFT,2016-10-03,57.42,19189500


In [None]:
stocks.loc[('AAPL',['2016-10-03','2016-10-04']), :]

In [None]:
d = stocks.loc[(slice(None),['2016-10-03','2016-10-04']), :]
d

In [None]:
#d[d.loc('Volume')>18000000]
d.query('Volume>18000000')