In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.Series(np.random.uniform(size=9),
                index=[["a", "a", "a", "b", "b", "c", "c", "d", "d"],
                      [1, 2, 3, 1, 3, 1, 2, 2, 3]])

In [3]:
data

a  1    0.797666
   2    0.468681
   3    0.827024
b  1    0.110667
   3    0.488918
c  1    0.325989
   2    0.509430
d  2    0.970532
   3    0.385578
dtype: float64

In [4]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [5]:
data["b"]

1    0.110667
3    0.488918
dtype: float64

In [6]:
data["c"]

1    0.325989
2    0.509430
dtype: float64

In [7]:
data["d"]

2    0.970532
3    0.385578
dtype: float64

In [8]:
data.loc[["b", "d"]]

b  1    0.110667
   3    0.488918
d  2    0.970532
   3    0.385578
dtype: float64

In [9]:
data.loc[:, 2]

a    0.468681
c    0.509430
d    0.970532
dtype: float64

In [10]:
#rearrange this data into a DataFrame using its unstack method
data.unstack()

Unnamed: 0,1,2,3
a,0.797666,0.468681,0.827024
b,0.110667,,0.488918
c,0.325989,0.50943,
d,,0.970532,0.385578


In [11]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                    index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
                    columns=[["Ohio", "Ohio", "Colorado"],
                            ["Green", "Red", "Green"]])

In [12]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [13]:
frame.index.names = {"key1", "key2"}

In [14]:
frame.columns.names = ["state", "color"]

In [15]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [16]:
frame.index.nlevels

2

In [17]:
frame["Ohio"]

Unnamed: 0_level_0,color,Green,Red
key2,key1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [18]:
pd.MultiIndex.from_arrays([["Ohio", "Ohio", "Colorado"],
                          ["Green", "Red", "Green"]],
                         names=["state", "color"])

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

In [19]:
frame.swaplevel("key1", "key2")

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [20]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [21]:
frame.swaplevel(0, 1).sort_index(level=0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11


In [22]:
frame.groupby(level="key2").sum()

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,3,5,7
b,15,17,19


In [23]:
#Indexing with a DataFrames columns
frame = pd.DataFrame({"a": range(7), "b": range(7, 0, -1),
                     "c": ["one", "one", "one", "two", "two",
                          "two", "two"],
                     "d": [0, 1, 2, 0, 1, 2, 3]})

In [24]:
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [25]:
frame2 = frame.set_index(["c", "d"])

In [26]:
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [27]:
#Combining and Merging Datasets
df1 = pd.DataFrame({"key": ["b", "b", "a", "c", "a", "a", "b"],
                   "data1": pd.Series(range(7), dtype="int64")})

In [28]:
df2 = pd.DataFrame({"key": ["a", "b", "d"],
                   "data2": pd.Series(range(3), dtype="int64")})

In [29]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [30]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [31]:
pd.merge(df1, df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [32]:
pd.merge(df1, df2, on="key")

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0
