In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [2]:
outerIndex = ["Group1", "Group1", "Group1", "Group2", "Group2", "Group2", "Group3", "Group3", "Group3"]

In [3]:
innerIndex = ["Index1", "Index2", "Index3", "Index1", "Index2", "Index3", "Index1", "Index2", "Index3"]

In [16]:
list(zip(outerIndex, innerIndex)) # Using zip function, we can connect both tuples as shown in output

[('Group1', 'Index1'),
 ('Group1', 'Index2'),
 ('Group1', 'Index3'),
 ('Group2', 'Index1'),
 ('Group2', 'Index2'),
 ('Group2', 'Index3'),
 ('Group3', 'Index1'),
 ('Group3', 'Index2'),
 ('Group3', 'Index3')]

In [17]:
hierarchy = list(zip(outerIndex, innerIndex))

In [18]:
hierarchy = pd.MultiIndex.from_tuples(hierarchy) #We made "hiearchy" a multi-index 

In [19]:
hierarchy

MultiIndex([('Group1', 'Index1'),
            ('Group1', 'Index2'),
            ('Group1', 'Index3'),
            ('Group2', 'Index1'),
            ('Group2', 'Index2'),
            ('Group2', 'Index3'),
            ('Group3', 'Index1'),
            ('Group3', 'Index2'),
            ('Group3', 'Index3')],
           )


### Creating multi-index dataframe:

In [20]:
df = pd.DataFrame(randn(9,3), hierarchy, ["Column1", "Column2", "Column3"]) # We have 3 groups and that makes 9 indexes. We give "hierarchy" as indexes 

In [21]:
df # It looks grouped now

Unnamed: 0,Unnamed: 1,Column1,Column2,Column3
Group1,Index1,-1.121247,-0.513446,1.015014
Group1,Index2,-1.064292,0.210763,-0.658162
Group1,Index3,-0.868284,-0.407767,0.107189
Group2,Index1,0.02184,-1.915961,-0.665936
Group2,Index2,0.086969,0.266706,2.226736
Group2,Index3,1.27068,-1.589957,0.596717
Group3,Index1,-1.088992,0.06447,-1.700142
Group3,Index2,-0.196494,1.215878,-0.764454
Group3,Index3,-0.176253,1.77348,0.987423


### Let's make some operations on this dataframe:

In [27]:
df["Column1"] # We take the first values of all indexes of all groups

Group1  Index1   -1.121247
        Index2   -1.064292
        Index3   -0.868284
Group2  Index1    0.021840
        Index2    0.086969
        Index3    1.270680
Group3  Index1   -1.088992
        Index2   -0.196494
        Index3   -0.176253
Name: Column1, dtype: float64

In [30]:
df.loc["Group1"] # We take only values of "Group 1" 

Unnamed: 0,Column1,Column2,Column3
Index1,-1.121247,-0.513446,1.015014
Index2,-1.064292,0.210763,-0.658162
Index3,-0.868284,-0.407767,0.107189


In [38]:
df.loc[["Group1", "Group2"]] # We take only values of "Group1" and "Group2"

Unnamed: 0,Unnamed: 1,Column1,Column2,Column3
Group1,Index1,-1.121247,-0.513446,1.015014
Group1,Index2,-1.064292,0.210763,-0.658162
Group1,Index3,-0.868284,-0.407767,0.107189
Group2,Index1,0.02184,-1.915961,-0.665936
Group2,Index2,0.086969,0.266706,2.226736
Group2,Index3,1.27068,-1.589957,0.596717


In [41]:
df.loc["Group1"].loc["Index1"] # We take only values of Group1's "Index1"

Column1   -1.121247
Column2   -0.513446
Column3    1.015014
Name: Index1, dtype: float64

In [42]:
type(df.loc["Group1"].loc["Index1"]) # As you can see its type is series

pandas.core.series.Series

In [57]:
df.loc["Group1"].loc["Index1"]["Column1"] # We take only the first value of Group1's Index1

-1.1212469097016808

#### What if we want to give names to Groups and Indexes like Columns: 

In [58]:
df.index.names

FrozenList(['Groups', 'Indexes'])

##### None, None means they have no name

In [59]:
df.index.names = ["Groups", "Indexes"] # Starting from the left it will give these names to those parameters

In [60]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Column1,Column2,Column3
Groups,Indexes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Group1,Index1,-1.121247,-0.513446,1.015014
Group1,Index2,-1.064292,0.210763,-0.658162
Group1,Index3,-0.868284,-0.407767,0.107189
Group2,Index1,0.02184,-1.915961,-0.665936
Group2,Index2,0.086969,0.266706,2.226736
Group2,Index3,1.27068,-1.589957,0.596717
Group3,Index1,-1.088992,0.06447,-1.700142
Group3,Index2,-0.196494,1.215878,-0.764454
Group3,Index3,-0.176253,1.77348,0.987423


***

In [61]:
df.xs("Group1") # It's same as ----> df.loc["Group1"]

Unnamed: 0_level_0,Column1,Column2,Column3
Indexes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Index1,-1.121247,-0.513446,1.015014
Index2,-1.064292,0.210763,-0.658162
Index3,-0.868284,-0.407767,0.107189


In [66]:
df.xs("Group2").xs("Index1") # It's same as ---->  df.loc["Group2"].loc["Index1"]

Column1    0.021840
Column2   -1.915961
Column3   -0.665936
Name: Index1, dtype: float64

In [69]:
df.xs("Group2").xs("Index1").xs("Column1") # It's almost same as ----> df.loc["Group2"].loc["Index1"]["Column1"]
# The only difference is if you type ".xs" before "Column1" it will return an error

0.021839942998728896

### What if we want to take all first Indexes from all Groups:
We will use a function of **.xs**

In [72]:
df.xs("Index1")

KeyError: 'Index1'

• When we do this it returns an error because these are **multi-level indexes**, that's why program starts from outside which means **the name**.\
\
• "The name" is where to look, in this program we can say those places are **"Groups"** or **"Indexes"**. Because the first one is **Groups** program checks it and doesn't find any **"Index1"** index as we gave in the code. That's why we should determine a **"level"** first.\
\
• If you press Shift+Tab you will see "key" and "level" parameters. **"Level"** means **"where"**. You will understand it below.

In [74]:
df.xs("Index1", level = "Indexes") #As you can see we gave level value as "Indexes" so it will look there and find Index1 and print all values

Unnamed: 0_level_0,Column1,Column2,Column3
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Group1,-1.121247,-0.513446,1.015014
Group2,0.02184,-1.915961,-0.665936
Group3,-1.088992,0.06447,-1.700142


In [77]:
df # For you to compare both outputs if they are correct. Just check Every "Index1" statements of all "Groups"

Unnamed: 0_level_0,Unnamed: 1_level_0,Column1,Column2,Column3
Groups,Indexes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Group1,Index1,-1.121247,-0.513446,1.015014
Group1,Index2,-1.064292,0.210763,-0.658162
Group1,Index3,-0.868284,-0.407767,0.107189
Group2,Index1,0.02184,-1.915961,-0.665936
Group2,Index2,0.086969,0.266706,2.226736
Group2,Index3,1.27068,-1.589957,0.596717
Group3,Index1,-1.088992,0.06447,-1.700142
Group3,Index2,-0.196494,1.215878,-0.764454
Group3,Index3,-0.176253,1.77348,0.987423


In [83]:
df.xs("Group1", level = "Groups") # This takes only the values of all "Indexes" of "Group1". You can compare from above, just check all values of "Group1" 

Unnamed: 0_level_0,Column1,Column2,Column3
Indexes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Index1,-1.121247,-0.513446,1.015014
Index2,-1.064292,0.210763,-0.658162
Index3,-0.868284,-0.407767,0.107189
