# Example for DataFrame
## Copyright (C) Princeton Consultants, 2017
### Import pandas, and create a MultiIndex

In [1]:
import pandas as pd
mi4 = pd.MultiIndex.from_tuples([('n1','n2'),('n1','n3'),('n2','n4'),('n3','n4'),('n2','n3'),('n4','n1')], names=['from','to'])

### Create a DataFrame by column

In [2]:
df1= pd.DataFrame({'cost' : [10*i for i in range(6)], 'bound' :[100*i for i in range(6)]}, index=mi4)
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,bound,cost
from,to,Unnamed: 2_level_1,Unnamed: 3_level_1
n1,n2,0,0
n1,n3,100,10
n2,n4,200,20
n3,n4,300,30
n2,n3,400,40
n4,n1,500,50


### Create a DataFrame by row

In [3]:
df2 = pd.DataFrame.from_records([(10*i, 100*i) for i in range(6)], columns=['cost','bound'],index=mi4)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,bound
from,to,Unnamed: 2_level_1,Unnamed: 3_level_1
n1,n2,0,0
n1,n3,10,100
n2,n4,20,200
n3,n4,30,300
n2,n3,40,400
n4,n1,50,500


### Get a single column

In [4]:
df1.cost

from  to
n1    n2     0
      n3    10
n2    n4    20
n3    n4    30
n2    n3    40
n4    n1    50
Name: cost, dtype: int64

In [5]:
df1['cost']

from  to
n1    n2     0
      n3    10
n2    n4    20
n3    n4    30
n2    n3    40
n4    n1    50
Name: cost, dtype: int64

### Get all values corresponding to a row

In [6]:
df1['n1']

KeyError: 'n1'

### Failed, so use `.loc` instead

In [7]:
df1.loc['n1']

Unnamed: 0_level_0,bound,cost
to,Unnamed: 1_level_1,Unnamed: 2_level_1
n2,0,0
n3,100,10


### Try to get a slice

In [8]:
df1['n1',:]

TypeError: unhashable type: 'slice'

### Failed, so use `.loc`

In [9]:
df1.loc['n1',:]

Unnamed: 0_level_0,bound,cost
to,Unnamed: 1_level_1,Unnamed: 2_level_1
n2,0,0
n3,100,10


### Try to now slice on second dimension of MultiIndex

In [10]:
df1.loc[:,'n3']

KeyError: 'the label [n3] is not in the [columns]'

### Failed, but you  get an individual element of the table

In [11]:
df1.loc[('n1','n3'),'cost']

10

### Use `pd.IndexSlice` to do slicing

In [12]:
idx=pd.IndexSlice
df1.loc[idx['n1',:]]

Unnamed: 0_level_0,bound,cost
to,Unnamed: 1_level_1,Unnamed: 2_level_1
n2,0,0
n3,100,10


### Now try slicing on the second dimension using `pd.IndexSlice`

In [13]:
df1.loc[idx[:,'n3']]

KeyError: 'the label [n3] is not in the [columns]'

### Sort the index, and get a new DataFrame, and try again

In [14]:
dfs = df1.sort_index()
dfs.loc[idx[:,'n3']]

KeyError: 'the label [n3] is not in the [columns]'

### Include a column name and it works

In [15]:
dfs.loc[idx[:,'n3'],'cost']

from  to
n1    n3    10
n2    n3    40
Name: cost, dtype: int64

### You can also request all columns using `:`

In [16]:
dfs.loc[idx[:,'n3'],:]

Unnamed: 0_level_0,Unnamed: 1_level_0,bound,cost
from,to,Unnamed: 2_level_1,Unnamed: 3_level_1
n1,n3,100,10
n2,n3,400,40


### And you can specify more than one column as a list

In [17]:
dfs.loc[idx[:,'n3'],['cost','bound']]

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,bound
from,to,Unnamed: 2_level_1,Unnamed: 3_level_1
n1,n3,10,100
n2,n3,40,400


In [18]:
df1[['cost','bound']]

Unnamed: 0_level_0,Unnamed: 1_level_0,cost,bound
from,to,Unnamed: 2_level_1,Unnamed: 3_level_1
n1,n2,0,0
n1,n3,10,100
n2,n4,20,200
n3,n4,30,300
n2,n3,40,400
n4,n1,50,500


### Note that if you grab a column, it is a Series, so slicing works more intuitively

In [19]:
df1.cost['n1',:]

to
n2     0
n3    10
Name: cost, dtype: int64

In [20]:
df1.cost[:,'n3']

from
n1    10
n2    40
Name: cost, dtype: int64