# DATA WRANGLING

# Hierarchical Indexing 

In [1]:
import pandas as pd
import numpy as np

In [7]:
s1=pd.Series(np.arange(9),
            index=[["a","a","a","b","b","b","c","c","c"],
            [1,2,4,5,4,3,2,2,3]])

In [8]:
s1

a  1    0
   2    1
   4    2
b  5    3
   4    4
   3    5
c  2    6
   2    7
   3    8
dtype: int32

In [9]:
s1.index

MultiIndex(levels=[['a', 'b', 'c'], [1, 2, 3, 4, 5]],
           codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 3, 4, 3, 2, 1, 1, 2]])

In [10]:
s1["a"]

1    0
2    1
4    2
dtype: int32

In [11]:
s1[1]

1

In [14]:
s1["a"][4]

2

In [15]:
s1["b":"c"]

b  5    3
   4    4
   3    5
c  2    6
   2    7
   3    8
dtype: int32

In [16]:
s1.loc[["b","a"]]

a  1    0
   2    1
   4    2
b  5    3
   4    4
   3    5
dtype: int32

In [19]:
s1.loc[:,2]# in thisway we can select all 2 index of higher level

a    1
c    6
c    7
dtype: int32

In [21]:
s1.unstack()

ValueError: Index contains duplicate entries, cannot reshape

In [2]:
 frame = pd.DataFrame(np.arange(12).reshape((4, 3)),   
                      index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],   
                      columns=[['Ohio', 'Ohio', 'Colorado'],   
                               ['Green', 'Red', 'Green']])


In [3]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [4]:
frame.index

MultiIndex(levels=[['a', 'b'], [1, 2]],
           codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [5]:
frame.columns

MultiIndex(levels=[['Colorado', 'Ohio'], ['Green', 'Red']],
           codes=[[1, 1, 0], [0, 1, 0]])

In [12]:
frame.index.names=["key1","key2"]
frame.columns.names=["state","color"]

In [13]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [15]:
frame["Ohio"]

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [17]:
frame["Colorado"]

Unnamed: 0_level_0,color,Green
key1,key2,Unnamed: 2_level_1
a,1,2
a,2,5
b,1,8
b,2,11


In [22]:
frame.iloc[1]

state     color
Ohio      Green    3
          Red      4
Colorado  Green    5
Name: (a, 2), dtype: int32

In [23]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [25]:
frame.loc["a"]

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0,1,2
2,3,4,5


In [27]:
frame.loc["a",1]# in this way we can take "a" ka 1

state     color
Ohio      Green    0
          Red      1
Colorado  Green    2
Name: (a, 1), dtype: int32

In [34]:
frame.loc["a":"b":2]

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8


In [41]:
frame["Ohio","Green"]# ohio kagreen


key1  key2
a     1       0
      2       3
b     1       6
      2       9
Name: (Ohio, Green), dtype: int32

# Reordering and Sorting Levels 

In [43]:
frame.swaplevel("key1","key2")

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [44]:
# by swaplevel we can interchange high level index to lowerlevel

In [47]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [48]:
frame.sort_index(level=0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [49]:
# by sort index we can sort the index 

# Summary Statistics by Level 

In [50]:
frame.sum()

state     color
Ohio      Green    18
          Red      22
Colorado  Green    26
dtype: int64