### CHAPTER 8
# Data Wrangling: Join, Combine, and Reshape


In [2]:
import pandas as pd
import numpy as np

## 8.1 Hierarchical Indexing

In [6]:
data = pd.Series(np.random.uniform(size=9),
                 index=[["a", "a", "a", "b", "b", "c", "c", "d", "d"],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

a  1    0.115205
   2    0.439073
   3    0.854943
b  1    0.738754
   3    0.686823
c  1    0.306195
   2    0.534106
d  2    0.680157
   3    0.827613
dtype: float64

In [7]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [8]:
data["b"]

1    0.738754
3    0.686823
dtype: float64

In [10]:
data["b":"c"]

b  1    0.738754
   3    0.686823
c  1    0.306195
   2    0.534106
dtype: float64

In [11]:
data.loc[:, 2]

a    0.439073
c    0.534106
d    0.680157
dtype: float64

In [12]:
data.unstack()

Unnamed: 0,1,2,3
a,0.115205,0.439073,0.854943
b,0.738754,,0.686823
c,0.306195,0.534106,
d,,0.680157,0.827613


In [13]:
data.unstack().stack()

a  1    0.115205
   2    0.439073
   3    0.854943
b  1    0.738754
   3    0.686823
c  1    0.306195
   2    0.534106
d  2    0.680157
   3    0.827613
dtype: float64

In [14]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
                     columns=[["Ohio", "Ohio", "Colorado"],
                              ["Green", "Red", "Green"]])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [15]:
frame.index.names = ["key1", "key2"]
frame.columns.names = ["state", "color"]
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [16]:
frame.index.nlevels

2

In [17]:
frame["Ohio"]

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [18]:
pd.MultiIndex.from_arrays([["Ohio", "Ohio", "Colorado"],
                           ["Green", "Red", "Green"]],
                          names=["state", "color"])

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

#### Reordering and Sorting Levels