### DataFrame 3

#  MultiIndex and Index Hierarchy
###  DataFrame with MultiIndex of diferent levels
### zip()
### interator
#### Advanced indexing: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html
### DataFrame.xs(key, axis=0, level=None, drop_level=True)[source]
#### Return cross-section from the Series/DataFrame.

##### This method takes a key argument to select data at a particular level of a MultiIndex

In [None]:
# pandas.MultiIndex(levels=None, codes=None, sortorder=None, names=None, dtype=None,
# copy=False, name=None, verify_integrity=True)
# parameter:
# levels: sequence of arrays
# The unique labels for each level.

## codes: sequence of arrays
# Integers for each level designating which label at each location.

## sortorder: optional int
# Level of sortedness (must be lexicographically sorted by that level).

## names:optional sequence of objects
# Names for each of the index levels. (name is accepted for compat).

## copy:  bool, default False
# Copy the meta-data.

## verify_integrity bool, default True
# Check that the levels/codes are consistent and valid.

In [None]:
##  A MultiIndex can be created from a list of arrays (using MultiIndex.from_arrays()),
# an array of tuples (using MultiIndex.from_tuples()), a crossed set of iterables (using MultiIndex.from_product()), 
# or a DataFrame (using MultiIndex.from_frame()). The Index constructor will attempt to return a MultiIndex 
# when it is passed a list of tuples.

In [17]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [34]:
# Example of MultiInsex.from_tuples(list(zip([],[])))
HierIndex=pd.MultiIndex.from_tuples(list(zip(['A','A','B','B','C','C'],['1','1','2','2','3','3'])))

In [36]:
# Create a dataframe with data =np.rand(3,4), index=HierIndex, column=['M''W]
df=pd.DataFrame(randn(6,2),HierIndex, ['M','N'])
df

Unnamed: 0,Unnamed: 1,M,N
A,1,-0.894136,-1.284464
A,1,-0.689024,0.310843
B,2,-0.023584,-0.254688
B,2,-1.07036,-0.252318
C,3,0.317183,0.153631
C,3,-0.401902,0.446829


In [27]:
# Create HierIndex from pd.MultiInsex.from_product(iterators)
##function product()
iterators=[['A','B','C','D'],['One','Two']]  # 
HierIndex1 =pd.MultiIndex.from_product(iterators)
HierIndex1


MultiIndex(levels=[['A', 'B', 'C', 'D'], ['One', 'Two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]])

In [33]:
# Create df=pd.DataFrame(randn(8,4),HierIndex1, ['J K X Y'].split())
df=pd.DataFrame(randn(8,4),HierIndex1, 'J K X Y'.split())
df

Unnamed: 0,Unnamed: 1,J,K,X,Y
A,One,-0.854119,-0.643539,0.882321,-0.637963
A,Two,-1.403093,0.821088,-1.14428,-1.441128
B,One,-0.337407,-1.925784,-1.410602,0.611076
B,Two,0.475467,0.215999,-1.256075,-0.521937
C,One,-0.788597,-1.450888,0.133552,-0.338144
C,Two,-0.452139,-0.544746,1.113187,0.150537
D,One,-0.691395,-0.638808,-0.543083,-0.460715
D,Two,0.257,-0.058982,2.569763,2.076613


In [38]:
# More on pd.MultiInsex.from_prodct(iterators) When you want every pairing of the elements in two iterables, 
# it can be easier to use the MultiIndex.from_product() method:

iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
HierIndex2=pd.MultiIndex.from_product(iterables, names=["first", "second"])
HierIndex2

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [None]:
# #MultiIndex([('bar', 'one'),
 #           ('bar', 'two'),
 #          ('baz', 'one'),
 #           ('baz', 'two'),
 #           ('foo', 'one'),
 #           ('foo', 'two'),
 #           ('qux', 'one'),
#            ('qux', 'two')],
#           names=['first', 'second'])

In [42]:
df=pd.DataFrame(randn(8,4),HierIndex2,'A B C D'.split())
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bar,one,-0.915338,-0.423485,0.288193,2.169872
bar,two,0.65907,0.717555,-1.459106,0.429901
baz,one,0.558484,1.972605,-0.443358,-0.797942
baz,two,0.416946,-0.38078,0.63378,1.360477
foo,one,-0.390524,-0.370751,2.184717,-2.245806
foo,two,-0.508012,0.454376,-0.219366,0.729839
qux,one,0.248769,0.449632,-0.30163,-2.323359
qux,two,-0.593802,1.827382,0.341786,0.106939


In [43]:
## adccesing the values using loc
df.loc['bar']

Unnamed: 0_level_0,A,B,C,D
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,-0.915338,-0.423485,0.288193,2.169872
two,0.65907,0.717555,-1.459106,0.429901


In [47]:
df.loc['foo'].loc['one'] # get the value from the subset of subset of the dfdf.loc[].loc[].loc[]

A   -0.390524
B   -0.370751
C    2.184717
D   -2.245806
Name: one, dtype: float64

In [48]:
df["A"]

first  second
bar    one      -0.915338
       two       0.659070
baz    one       0.558484
       two       0.416946
foo    one      -0.390524
       two      -0.508012
qux    one       0.248769
       two      -0.593802
Name: A, dtype: float64

In [51]:
df.iloc[0:4,1] # access the values slicing from first to 4th row of the first index and the 2nd column

first  second
bar    one      -0.423485
       two       0.717555
baz    one       1.972605
       two      -0.380780
Name: B, dtype: float64

In [60]:
## DataFrame.xs(key, axis=0, level=None, drop_level=True)
df.xs('bar')

Unnamed: 0_level_0,A,B,C,D
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
one,-0.915338,-0.423485,0.288193,2.169872
two,0.65907,0.717555,-1.459106,0.429901


In [57]:
df.xs('one',level=1)

Unnamed: 0_level_0,A,B,C,D
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,-0.915338,-0.423485,0.288193,2.169872
baz,0.558484,1.972605,-0.443358,-0.797942
foo,-0.390524,-0.370751,2.184717,-2.245806
qux,0.248769,0.449632,-0.30163,-2.323359


In [62]:
df.xs('baz').loc['one']

A    0.558484
B    1.972605
C   -0.443358
D   -0.797942
Name: one, dtype: float64

In [63]:
# more on df.xs(): https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.xs.html