# MultiIndex

In [2]:
import pandas as pd

In [3]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"])
big_mac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [4]:
big_mac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

### 1. Create A MultiIndex with the .set_index() Method

In [5]:
big_mac.set_index("Country")

Unnamed: 0_level_0,Date,Price in US Dollars
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,2016-01-01,2.39
Australia,2016-01-01,3.74
Brazil,2016-01-01,3.35
Britain,2016-01-01,4.22
Canada,2016-01-01,4.14
Chile,2016-01-01,2.94
China,2016-01-01,2.68
Colombia,2016-01-01,2.43
Costa Rica,2016-01-01,4.02
Czech Republic,2016-01-01,2.98


In [9]:
big_mac.set_index(keys = ["Date", "Country"], inplace=True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35


In [11]:
big_mac.sort_index(inplace=True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [13]:
big_mac.index.names

FrozenList(['Date', 'Country'])

In [14]:
type(big_mac.index)

pandas.core.indexes.multi.MultiIndex

In [15]:
big_mac.index[0]

(Timestamp('2010-01-01 00:00:00'), 'Argentina')

### 2. The .get_level_values() Method

In [18]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
big_mac.sort_index(inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [25]:
# big_mac.index.get_level_values("Country")
big_mac.index.get_level_values(1)

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine',
       'United States', 'Uruguay', 'Venezuela', 'Vietnam'],
      dtype='object', name='Country', length=652)

### 3. The .set_names() Method on MultiIndex

In [26]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
big_mac.sort_index(inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [29]:
big_mac.index.set_names(["Day", "Location"], inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Location,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


### 4. The .sort_index() Method on MultiIndex DataFrame

In [30]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
big_mac.sort_index(inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [33]:
big_mac.sort_index(ascending=[True, False], inplace=True)

### 5. Extract Rows from a MultiIndex DataFrame

In [34]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
big_mac.sort_index(inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [37]:
big_mac.loc[("2010-01-01", "Brazil"), "Price in US Dollars"]

Date        Country
2010-01-01  Brazil     4.76
Name: Price in US Dollars, dtype: float64

In [38]:
big_mac.loc[("2015-07-01", "Chile"), "Price in US Dollars"]

Date        Country
2015-07-01  Chile      3.27
Name: Price in US Dollars, dtype: float64

In [41]:
big_mac.ix[("2016-01-01", "China"), 0]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


2.6800000000000002

### 6. The .transpose() Method

In [42]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
big_mac.sort_index(inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [44]:
big_mac = big_mac.transpose()

In [45]:
big_mac.head(1)

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


In [47]:
big_mac.ix["Price in US Dollars", ("2016-01-01", "Denmark")]

4.3200000000000003

### 7. The .swaplevel() Method

In [48]:
big_mac = pd.read_csv("../00_Datasers/bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
big_mac.sort_index(inplace = True)
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [50]:
big_mac = big_mac.swaplevel()
big_mac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2010-01-01,1.84
Australia,2010-01-01,3.98
Brazil,2010-01-01,4.76


### 8. The .stack() Method

In [53]:
world = pd.read_csv("../00_Datasers/worldstats.csv", index_col= ["country", "year"])
world.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0


In [55]:
type(world.stack())

pandas.core.series.Series

In [56]:
world.stack().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,Population,3.920223e+08
Arab World,2015,GDP,2.530102e+12
Arab World,2014,Population,3.842226e+08
Arab World,2014,GDP,2.873600e+12
Arab World,2013,Population,3.765043e+08
Arab World,2013,GDP,2.846994e+12
Arab World,2012,Population,3.688026e+08
Arab World,2012,GDP,2.773270e+12
Arab World,2011,Population,3.610318e+08
Arab World,2011,GDP,2.497945e+12
