In [1]:
import pandas as pd

In [6]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"]) #import bigmac data with the Date column as a datetime datatype
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [9]:
bigmac.dtypes
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
Date                   652 non-null datetime64[ns]
Country                652 non-null object
Price in US Dollars    652 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


# Create a MultiIndex with the .set_index() Method

In [10]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"]) #import bigmac data with the Date column as a datetime datatype
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [21]:
bigmac.set_index(["Date", "Country"], inplace = True) # multiIndex is an index with multiple layers/strings/dimensions
# first parameter is the first layer in multiIndex
# most outer(first) layer should have the least number of unique values(increasing values per layer)
bigmac.head(3)

KeyError: 'Date'

In [20]:
bigmac.sort_index(inplace = True) # sorts indexes from outside layer to inside layer

In [22]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [25]:
bigmac.index # returns lists of each layer
bigmac.index.names # returns list of label names that make up two layers

FrozenList(['Date', 'Country'])

In [26]:
type(bigmac.index) # MultiIndex is a unique type

pandas.core.indexes.multi.MultiIndex

In [28]:
bigmac.index[0] # returns tuple of value from each layer at n position

(Timestamp('2010-01-01 00:00:00'), 'Argentina')

# The .get_level_values() Method

In [33]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"]) #import bigmac data with the Date column as a datetime datatype
bigmac.sort_index(inplace = True)                               # ^parameter sets MultiIndex on import (using list of strings)
    # ^improves efficiency
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [37]:
bigmac.index.get_level_values(0) # returns values on the given index of the MultiIndex / Accepts a numberic argument or level name
# most outer layer(date) starts at position 0
bigmac.index.get_level_values("Date") # same as above

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

In [39]:
bigmac.index.get_level_values(1)
bigmac.index.get_level_values("Country") # same as above

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine',
       'United States', 'Uruguay', 'Venezuela', 'Vietnam'],
      dtype='object', name='Country', length=652)

# The .set_names() Method

In [40]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"]) #import bigmac data with the Date column as a datetime datatype
bigmac.sort_index(inplace = True)                               # ^parameter sets MultiIndex on import (using list of strings)
    # ^improves efficiency
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [46]:
bigmac.index.set_names(["Day", "Location"], inplace = True) # sets/changes index/column name to given parameter

In [47]:
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Location,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


# The .sort_index() Method on a MultiIndex DataFrame

In [48]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"]) #import bigmac data with the Date column as a datetime datatype
bigmac.sort_index(inplace = True)                               # ^parameter sets MultiIndex on import (using list of strings)
    # ^improves efficiency
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [54]:
bigmac.sort_index(ascending = [True, False], inplace = True) # specifies which argument to apply to a particular layer(use a list)

In [55]:
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Uruguay,3.32
2010-01-01,United States,3.58
2010-01-01,Ukraine,1.83


# Extract Rows form a MultiIndex DataFrame

In [56]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"]) #import bigmac data with the Date column as a datetime datatype
bigmac.sort_index(inplace = True)                               # ^parameter sets MultiIndex on import (using list of strings)
    # ^improves efficiency
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [61]:
bigmac.loc[("2010-01-01", "Brazil"), "Price in US Dollars"] #must use tuple not a list for this first parameter in .loc for pandas / returns only selected row
# tuple is the first argument, the column is the second (horizontal and vertical respectively)

Date        Country
2010-01-01  Brazil     4.76
Name: Price in US Dollars, dtype: float64

In [64]:
bigmac.loc[("2015-07-01", "Chile"), "Price in US Dollars"]

Date        Country
2015-07-01  Chile      3.27
Name: Price in US Dollars, dtype: float64

# The .transpose() Method

In [67]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"]) #import bigmac data with the Date column as a datetime datatype
bigmac.sort_index(inplace = True)                               # ^parameter sets MultiIndex on import (using list of strings)
    # ^improves efficiency
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
