In [1]:
import pandas as pd
pd.__version__

'1.4.2'

In [38]:
subway = pd.read_csv("subway_locations.csv", index_col = ["state", "city"]).sort_index().iloc[:250]
subway.loc[("AK", "Anchorage")]
# subway.loc[("NY", "Elmhurst")]

Unnamed: 0_level_0,Unnamed: 1_level_0,latitude,longitude
state,city,Unnamed: 2_level_1,Unnamed: 3_level_1
AK,Anchorage,61.2037,-149.7447
AK,Anchorage,61.206,-149.8101
AK,Anchorage,61.206,-149.8101
AK,Anchorage,61.19,-149.8938
AK,Anchorage,61.2037,-149.7447
AK,Anchorage,61.1549,-149.8866
AK,Anchorage,61.19,-149.8938
AK,Anchorage,61.1194,-149.8974
AK,Anchorage,61.2037,-149.7447
AK,Anchorage,61.1535,-149.8289


## Our Big Mac Dataset

In [2]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"])
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [3]:
bigmac.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 652 non-null    datetime64[ns]
 1   Country              652 non-null    object        
 2   Price in US Dollars  652 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


In [4]:
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 652 non-null    datetime64[ns]
 1   Country              652 non-null    object        
 2   Price in US Dollars  652 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


## Create a MultiIndex with the set_index Method
- A multi-index is an index with multiple levels or layers.
- If two levels are used, two values need to be passed to determine what row to extract.
- REVIEW: The `set_index` method takes a `Series` from the `DataFrame` and makes it the index.
- If the `set_index` method is passed a list, it will create a multi-index `DataFrame`
- The order of the `Series` in the argument will determine the layering of the multi-index.

In [9]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"])
bigmac.set_index("Date")
bigmac.set_index(keys = "Country")

bigmac.set_index(keys = ["Date", "Country"])
bigmac.set_index(keys = ["Country", "Date"])
bigmac.nunique()

# bigmac.set_index(keys = ["Date", "Country"], inplace = True)
bigmac = bigmac.set_index(keys = ["Date", "Country"])

# All index levels are sorted in ascending order
bigmac.sort_index()
bigmac = bigmac.sort_index()
bigmac.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98


- The index levels will be named after the `Series`.
- Each index index element holds TWO values

In [13]:
bigmac.index
bigmac.index.names       # FrozenList(['Date', 'Country'])
type(bigmac.index)       # pandas.core.indexes.multi.MultiIndex
bigmac.index[0]          # Timestamp('2010-01-01 00:00:00'), 'Argentina')
type(bigmac.index[0])    # tuple

tuple

## Extract Index Level Values with the get_level_values Method
- New way to set index -- use the `index_col` parameter
- The `get_level_values()` is called on the multi-index.
- The `get_level_values()` method accepts an index level.
- The argument passed can be either the numeric position of the index or its name
- The method returns the index at that level

In [25]:
# Review of index_col parameter
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84


In [7]:
bigmac.index.get_level_values(0)
bigmac.index.get_level_values(1)

bigmac.index.get_level_values("Date")
bigmac.index.get_level_values("Country")

Index([u'Argentina', u'Australia', u'Brazil', u'Britain', u'Canada', u'Chile',
       u'China', u'Colombia', u'Costa Rica', u'Czech Republic',
       ...
       u'Switzerland', u'Taiwan', u'Thailand', u'Turkey', u'UAE', u'Ukraine',
       u'United States', u'Uruguay', u'Venezuela', u'Vietnam'],
      dtype='object', name=u'Country', length=652)

## The `set_names` Method on Index
- The `set_names()` method is called on the index.
- It changes the names of the indices.

In [26]:
# Review of index_col parameter
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)

bigmac.index.set_names(names = "Date", level = 0)
bigmac.index.set_names(["Day", "Location"], inplace = True)
bigmac.head(1)

In [27]:
bigmac

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
...,...,...
2010-01-01,Turkey,3.83
2010-01-01,UAE,2.99
2010-01-01,Ukraine,1.83
2010-01-01,United States,3.58


## Extract Rows from a `MultiIndex DataFrame`

In [3]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac["Example"] = 1
bigmac.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars,Example
Date,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,Argentina,1.84,1


In [41]:
bigmac.loc[("2010-01-01", "Brazil") : ("2010-01-01", "Turkey")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars,Example
Date,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,Brazil,4.76,1
2010-01-01,Britain,3.67,1
2010-01-01,Canada,3.97,1
2010-01-01,Chile,3.18,1
2010-01-01,China,1.83,1
2010-01-01,Colombia,3.91,1
2010-01-01,Costa Rica,3.52,1
2010-01-01,Czech Republic,3.71,1
2010-01-01,Denmark,5.99,1
2010-01-01,Egypt,2.38,1


In [54]:
bigmac.loc["2015-07-01"]
bigmac.loc["2015-07-01", "Brazil"]
bigmac.loc["2016-01-01", "Germany"]
bigmac.loc["2015-07-01", "Brazil"]

bigmac.ix[("2015-07-01", "India"), "Pric"]

1.8300000000000001

## The `transpose` Method
- Flips the horizontal and vertical axes.

In [40]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head(3)

bigmac.transpose()
bigmac = bigmac.transpose()
bigmac.head(3)

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


In [43]:
bigmac.loc[("Price in US Dollars",)]
bigmac.loc[("Price in US Dollars",), ("2010-01-01", "Sri Lanka")]

Price in US Dollars    1.83
Name: (2010-01-01 00:00:00, Sri Lanka), dtype: float64

##  The `.swaplevel()` Method
- The `.swaplevel()` method swaps two levels within an index
- No arguments have to be passed if only two levels.
- Additional arguments must be passed if 3 or more levels.

In [65]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98


In [13]:
bigmac.swaplevel().head(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2010-01-01,1.84
Australia,2010-01-01,3.98
Brazil,2010-01-01,4.76
Britain,2010-01-01,3.67


## The `sort_index` Method on a MultiIndex `DataFrame`

In [8]:
bigmac = pd.read_csv("bigmac.csv", parse_dates = ["Date"], index_col = ["Date", "Country"])
bigmac.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Ukraine,1.83
2010-01-01,United States,3.58
2010-01-01,Uruguay,3.32


In [75]:
bigmac.sort_index(level = 0, ascending = True) # Sort by Date level in ascending order (True for both)
bigmac.sort_index(level = 0, ascending = False) # Sort by Date level in descending order (False for both

bigmac.sort_index(level = 1, ascending = True) # Sort by Country, ascending order
bigmac.sort_index(level = 1, ascending = False) # Sort by Country, descending order

#bigmac.sort_index(axis = 0, level = [1, 0], ascending = [True, False]) # Sort ascending for Date, descending for Country

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Vietnam,2.67
2015-07-01,Vietnam,2.75
2015-01-01,Vietnam,2.81
2014-07-01,Vietnam,2.83
2014-01-01,Vietnam,2.84
2016-01-01,Venezuela,0.66
2015-07-01,Venezuela,0.67
2015-01-01,Venezuela,2.53
2014-07-01,Venezuela,6.82
2014-01-01,Venezuela,7.15
