# MultiIndex

In [1]:
import pandas as pd

## This Module's Dataset

In [4]:
bigmac_df = pd.read_csv("bigmac.csv", parse_dates=["Date"], date_format="%Y-%m-%d")

In [5]:
bigmac_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1386 entries, 0 to 1385
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 1386 non-null   datetime64[ns]
 1   Country              1386 non-null   object        
 2   Price in US Dollars  1386 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 32.6+ KB


In [9]:
bigmac_df.sort_values("Price in US Dollars", ascending=False)

Unnamed: 0,Date,Country,Price in US Dollars
537,2011-07-01,Norway,8.311708
517,2011-07-01,Switzerland,8.063016
372,2008-06-01,Norway,7.875333
661,2013-01-01,Norway,7.842279
745,2014-01-01,Norway,7.795435
...,...,...,...
47,2001-04-01,Philippines,1.172962
155,2004-05-01,Uruguay,1.002186
87,2002-04-01,South Africa,0.889908
56,2002-04-01,Argentina,0.798722


## Create a MultiIndex
- A **MultiIndex** is an index with multiple levels or layers.
- Pass the `set_index` method a list of column names to create a multi-index **DataFrame**
- `read_csv` can take list of index in `index_col`

In [18]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [17]:
bigmac.loc["2000-04-01"].loc["United States"]

Price in US Dollars    2.51
Name: United States, dtype: float64

In [20]:
bigmac.index.names
bigmac.index[0]

(Timestamp('2000-04-01 00:00:00'), 'Argentina')

In [21]:
type(bigmac.index[0])

tuple

## Extract Index Level Values
- The `get_level_values` method extracts an **Index** with the values form one level in the **MultiIndex**

In [23]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [26]:
bigmac.index.get_level_values("Date")

DatetimeIndex(['2000-04-01', '2000-04-01', '2000-04-01', '2000-04-01',
               '2000-04-01', '2000-04-01', '2000-04-01', '2000-04-01',
               '2000-04-01', '2000-04-01',
               ...
               '2020-07-01', '2020-07-01', '2020-07-01', '2020-07-01',
               '2020-07-01', '2020-07-01', '2020-07-01', '2020-07-01',
               '2020-07-01', '2020-07-01'],
              dtype='datetime64[ns]', name='Date', length=1386, freq=None)

In [30]:
bigmac.index.get_level_values(1)

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Czech Republic', 'Denmark', 'Euro area',
       ...
       'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'Ukraine',
       'United Arab Emirates', 'United States', 'Uruguay', 'Vietnam'],
      dtype='object', name='Country', length=1386)

In [31]:
bigmac.index.get_level_values("Country")

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Czech Republic', 'Denmark', 'Euro area',
       ...
       'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'Ukraine',
       'United Arab Emirates', 'United States', 'Uruguay', 'Vietnam'],
      dtype='object', name='Country', length=1386)

## Rename Index Levels
- `set_names` method on **MultiIndex** to change one or more level names.
- Use the `names` and `level` param to target a nested index at a given level
- pass `names` list of strings to overwrite all level names.
- `set_names` method returns a copy... replace index to change

In [38]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [39]:
bigmac.index = bigmac.index.set_names(names="Day", level=0)

In [40]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [41]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [42]:
bigmac.index = bigmac.index.set_names(names="Day", level="Date")

In [43]:
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [44]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [46]:
bigmac.index = bigmac.index.set_names(names=["Day", "Place"])

In [47]:
bigmac

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Place,Unnamed: 2_level_1
2000-04-01,Argentina,2.500000
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002000
2000-04-01,Canada,1.938776
...,...,...
2020-07-01,Ukraine,2.174714
2020-07-01,United Arab Emirates,4.015846
2020-07-01,United States,5.710000
2020-07-01,Uruguay,4.327418


## The sort_index Targetting on MultiIndex 

In [48]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"])
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Canada,1.938776
2000-04-01,Switzerland,3.470588


In [49]:
bigmac.sort_index(ascending=[True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,United States,2.510000
2000-04-01,Thailand,1.447368
2000-04-01,Taiwan,2.287582
2000-04-01,Switzerland,3.470588
2000-04-01,Sweden,2.714932
...,...,...
2020-07-01,Brazil,3.913528
2020-07-01,Bahrain,3.713035
2020-07-01,Azerbaijan,2.324897
2020-07-01,Australia,4.578450


## Extract Rows from MultiIndex Dataframes
- Use tuple on `loc` and `iloc` to get to row

In [50]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [52]:
bigmac.loc[("2000-04-01", "United States")]

Price in US Dollars    2.51
Name: (2000-04-01 00:00:00, United States), dtype: float64

In [62]:
bigmac.iloc[27]

Price in US Dollars    2.51
Name: (2000-04-01 00:00:00, United States), dtype: float64

In [63]:
bigmac.loc["2000-04-01"]

Unnamed: 0_level_0,Price in US Dollars
Country,Unnamed: 1_level_1
Argentina,2.5
Australia,1.541667
Brazil,1.648045
Britain,3.002
Canada,1.938776
Chile,2.451362
China,1.195652
Czech Republic,1.390537
Denmark,3.078358
Euro area,2.3808


In [66]:
bigmac.loc["2000-04-01", "United States"] # Not recomended syntax

Price in US Dollars    2.51
Name: (2000-04-01 00:00:00, United States), dtype: float64

In [64]:
# Can be index label and column label too
bigmac.loc["2000-04-01", "Price in US Dollars"] # ambiguous when accessing without iterable in loc accessor

Country
Argentina         2.500000
Australia         1.541667
Brazil            1.648045
Britain           3.002000
Canada            1.938776
Chile             2.451362
China             1.195652
Czech Republic    1.390537
Denmark           3.078358
Euro area         2.380800
Hong Kong         1.309371
Hungary           1.215054
Indonesia         1.825047
Israel            3.580247
Japan             2.773585
Malaysia          1.189474
Mexico            2.221041
New Zealand       1.691542
Poland            1.279070
Russia            1.385965
Singapore         1.882353
South Africa      1.339286
South Korea       2.707581
Sweden            2.714932
Switzerland       3.470588
Taiwan            2.287582
Thailand          1.447368
United States     2.510000
Name: Price in US Dollars, dtype: float64

In [68]:
bigmac.loc[("2000-04-01", "Russia")] # Use tuple... can also specify column after tuple... just not enough columns in df to show

Price in US Dollars    1.385965
Name: (2000-04-01 00:00:00, Russia), dtype: float64

In [71]:
bigmac.loc[("2000-04-01", "Hungary"):("2000-04-01", "Poland")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Hungary,1.215054
2000-04-01,Indonesia,1.825047
2000-04-01,Israel,3.580247
2000-04-01,Japan,2.773585
2000-04-01,Malaysia,1.189474
2000-04-01,Mexico,2.221041
2000-04-01,New Zealand,1.691542
2000-04-01,Poland,1.27907


## The transpose Method
- The `transpose` method inverts/flips the horizontal and vertical axes of the **DataFrame**
- columns becomes index and index becomes columns

In [72]:
bigmac = bigmac_df.copy().set_index(keys=["Date", "Country"]).sort_index()
bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.541667
2000-04-01,Brazil,1.648045
2000-04-01,Britain,3.002
2000-04-01,Canada,1.938776


In [76]:
start = ("2018-01-01", "China")
end = ("2018-01-01", "Denmark")
bigmac.loc[start:end]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2018-01-01,China,3.171642
2018-01-01,Colombia,3.832468
2018-01-01,Costa Rica,4.027932
2018-01-01,Czech Republic,3.807779
2018-01-01,Denmark,4.93202


In [79]:
bigmac.loc[start:end].transpose() # shows how there can be multi indexes on columns too

Date,2018-01-01,2018-01-01,2018-01-01,2018-01-01,2018-01-01
Country,China,Colombia,Costa Rica,Czech Republic,Denmark
Price in US Dollars,3.171642,3.832468,4.027932,3.807779,4.93202


In [93]:
bigmac.transpose().columns

MultiIndex([('2000-04-01',            'Argentina'),
            ('2000-04-01',            'Australia'),
            ('2000-04-01',               'Brazil'),
            ('2000-04-01',              'Britain'),
            ('2000-04-01',               'Canada'),
            ('2000-04-01',                'Chile'),
            ('2000-04-01',                'China'),
            ('2000-04-01',       'Czech Republic'),
            ('2000-04-01',              'Denmark'),
            ('2000-04-01',            'Euro area'),
            ...
            ('2020-07-01',               'Sweden'),
            ('2020-07-01',          'Switzerland'),
            ('2020-07-01',               'Taiwan'),
            ('2020-07-01',             'Thailand'),
            ('2020-07-01',               'Turkey'),
            ('2020-07-01',              'Ukraine'),
            ('2020-07-01', 'United Arab Emirates'),
            ('2020-07-01',        'United States'),
            ('2020-07-01',              'Uruguay

In [95]:
bigmac.transpose().index

Index(['Price in US Dollars'], dtype='object')

## The stack Method
- The `stack` method moves the column index to the row index.
- Pandas will return a **MultiIndex Series**
- Think of it like stacking index levels for a **MultiIndex**.

In [96]:
world_df = pd.read_csv('worldstats.csv', index_col=["year", "country"]).sort_index()
world_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,Afghanistan,8994793.0,537777800.0
1960,Algeria,11124892.0,2723638000.0
1960,Australia,10276477.0,18567590000.0
1960,Austria,7047539.0,6592694000.0
1960,"Bahamas, The",109526.0,169802300.0


In [98]:
world = world_df.copy()

In [102]:
world.stack()

year  country                
1960  Afghanistan  Population    8.994793e+06
                   GDP           5.377778e+08
      Algeria      Population    1.112489e+07
                   GDP           2.723638e+09
      Australia    Population    1.027648e+07
                                     ...     
2015  World        GDP           7.343364e+13
      Zambia       Population    1.621177e+07
                   GDP           2.120156e+10
      Zimbabwe     Population    1.560275e+07
                   GDP           1.389294e+10
Length: 22422, dtype: float64

In [103]:
world.stack().index

MultiIndex([(1960,        'Afghanistan', 'Population'),
            (1960,        'Afghanistan',        'GDP'),
            (1960,            'Algeria', 'Population'),
            (1960,            'Algeria',        'GDP'),
            (1960,          'Australia', 'Population'),
            (1960,          'Australia',        'GDP'),
            (1960,            'Austria', 'Population'),
            (1960,            'Austria',        'GDP'),
            (1960,       'Bahamas, The', 'Population'),
            (1960,       'Bahamas, The',        'GDP'),
            ...
            (2015,            'Vietnam', 'Population'),
            (2015,            'Vietnam',        'GDP'),
            (2015, 'West Bank and Gaza', 'Population'),
            (2015, 'West Bank and Gaza',        'GDP'),
            (2015,              'World', 'Population'),
            (2015,              'World',        'GDP'),
            (2015,             'Zambia', 'Population'),
            (2015,             '

In [107]:
world.stack().loc[(1960, 'Afghanistan')]

Population    8.994793e+06
GDP           5.377778e+08
dtype: float64

In [119]:
world.stack().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,Afghanistan,Population,8.994793e+06
1960,Afghanistan,GDP,5.377778e+08
1960,Algeria,Population,1.112489e+07
1960,Algeria,GDP,2.723638e+09
1960,Australia,Population,1.027648e+07
...,...,...,...
2015,World,GDP,7.343364e+13
2015,Zambia,Population,1.621177e+07
2015,Zambia,GDP,2.120156e+10
2015,Zimbabwe,Population,1.560275e+07


In [118]:
world.stack().to_frame().loc[(1960, 'Afghanistan')]

Unnamed: 0,0
Population,8994793.0
GDP,537777800.0


## The unstack Method
- The `unstack` method moves a row index to the column index (the inverse of the `stack` method).
- By default the `unstack` method will move the innermost index.
- We can customize the moved index with the level parameter
- The level param accepts the level`s index position or its name. It can accept a list of positions/names.

In [122]:
world = world_df.copy().stack()
world.head()

year  country                
1960  Afghanistan  Population    8.994793e+06
                   GDP           5.377778e+08
      Algeria      Population    1.112489e+07
                   GDP           2.723638e+09
      Australia    Population    1.027648e+07
dtype: float64

In [123]:
world.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,Afghanistan,8.994793e+06,5.377778e+08
1960,Algeria,1.112489e+07,2.723638e+09
1960,Australia,1.027648e+07,1.856759e+10
1960,Austria,7.047539e+06,6.592694e+09
1960,"Bahamas, The",1.095260e+05,1.698023e+08
...,...,...,...
2015,Vietnam,9.170380e+07,1.935994e+11
2015,West Bank and Gaza,4.422143e+06,1.267740e+10
2015,World,7.346633e+09,7.343364e+13
2015,Zambia,1.621177e+07,2.120156e+10


In [133]:
world.unstack().unstack()

Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,...,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP
country,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Arab World,Argentina,Armenia,Aruba,...,Uzbekistan,Vanuatu,"Venezuela, RB",Vietnam,Virgin Islands (U.S.),West Bank and Gaza,World,"Yemen, Rep.",Zambia,Zimbabwe
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1960,8994793.0,,11124892.0,,,,,,,,...,,,8607600000.0,,24200000.0,,1364643000000.0,,698739700.0,1052990000.0
1961,9164945.0,,11404859.0,,,,,,,,...,,,8923367000.0,,25700000.0,,1420440000000.0,,682359700.0,1096647000.0
1962,9343772.0,,11690152.0,,,,,21287682.0,,,...,,,9873398000.0,,36900000.0,,1524573000000.0,,679279700.0,1117602000.0
1963,9531555.0,,11985130.0,,,,,21621845.0,,,...,,,10663380000.0,,41400000.0,,1638187000000.0,,704339700.0,1159512000.0
1964,9728645.0,,12295973.0,,,,,21953926.0,,,...,,,9113581000.0,,53800000.0,,1799675000000.0,,822639700.0,1217138000.0
1965,9935358.0,,12626953.0,,,,,22283389.0,,,...,,,9602945000.0,,66500000.0,,1959900000000.0,,1061200000.0,1311436000.0
1966,10148841.0,,12980269.0,,,,,22608747.0,,,...,,,10096570000.0,,84100000.0,,2125397000000.0,,1239000000.0,1281750000.0
1967,10368600.0,,13354197.0,,,,,22932201.0,,,...,,,10472780000.0,,115400000.0,,2262923000000.0,,1340639000.0,1397002000.0
1968,10599790.0,,13744383.0,,,,115557094.0,23261273.0,,,...,,,11470910000.0,,173800000.0,,2440549000000.0,,1573739000.0,1479600000.0
1969,10849510.0,,14144437.0,,,,118823872.0,23605992.0,,,...,,,11927570000.0,,211300000.0,,2686747000000.0,,1926399000.0,1747999000.0


In [134]:
world.unstack().unstack().columns

MultiIndex([('Population',           'Afghanistan'),
            ('Population',               'Albania'),
            ('Population',               'Algeria'),
            ('Population',               'Andorra'),
            ('Population',                'Angola'),
            ('Population',   'Antigua and Barbuda'),
            ('Population',            'Arab World'),
            ('Population',             'Argentina'),
            ('Population',               'Armenia'),
            ('Population',                 'Aruba'),
            ...
            (       'GDP',            'Uzbekistan'),
            (       'GDP',               'Vanuatu'),
            (       'GDP',         'Venezuela, RB'),
            (       'GDP',               'Vietnam'),
            (       'GDP', 'Virgin Islands (U.S.)'),
            (       'GDP',    'West Bank and Gaza'),
            (       'GDP',                 'World'),
            (       'GDP',           'Yemen, Rep.'),
            (       'GDP',    

In [135]:
world.unstack().unstack().index

Index([1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971,
       1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983,
       1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995,
       1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
       2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015],
      dtype='int64', name='year')

In [136]:
world.unstack(level=0) # Country kept as row index and year becomes column

Unnamed: 0_level_0,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Population,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,2.518362e+07,2.587754e+07,2.652874e+07,2.720729e+07,2.796221e+07,2.880917e+07,2.972680e+07,3.068250e+07,3.162751e+07,3.252656e+07
Afghanistan,GDP,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,1.373333e+09,1.408889e+09,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,Population,,,,,,,,,,,...,2.992547e+06,2.970017e+06,2.947314e+06,2.927519e+06,2.913021e+06,2.904780e+06,2.900247e+06,2.896652e+06,2.893654e+06,2.889167e+06
Albania,GDP,,,,,,,,,,,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,Population,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,3.374933e+07,3.426197e+07,3.481106e+07,3.540179e+07,3.603616e+07,3.671713e+07,3.743943e+07,3.818614e+07,3.893433e+07,3.966652e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Yemen, Rep.",GDP,,,,,,,,,,,...,1.908173e+10,2.563367e+10,3.039720e+10,2.845950e+10,3.090675e+10,3.107886e+10,3.207477e+10,3.595450e+10,,
Zambia,Population,3.049586e+06,3.142848e+06,3.240664e+06,3.342894e+06,3.449266e+06,3.559687e+06,3.674088e+06,3.792864e+06,3.916928e+06,4.047479e+06,...,1.238151e+07,1.273868e+07,1.311458e+07,1.350785e+07,1.391744e+07,1.434353e+07,1.478658e+07,1.524609e+07,1.572134e+07,1.621177e+07
Zambia,GDP,6.987397e+08,6.823597e+08,6.792797e+08,7.043397e+08,8.226397e+08,1.061200e+09,1.239000e+09,1.340639e+09,1.573739e+09,1.926399e+09,...,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026555e+10,2.345952e+10,2.550306e+10,2.804552e+10,2.713464e+10,2.120156e+10
Zimbabwe,Population,3.752390e+06,3.876638e+06,4.006262e+06,4.140804e+06,4.279561e+06,4.422132e+06,4.568320e+06,4.718612e+06,4.874113e+06,5.036321e+06,...,1.312794e+07,1.329780e+07,1.349546e+07,1.372100e+07,1.397390e+07,1.425559e+07,1.456548e+07,1.489809e+07,1.524586e+07,1.560275e+07


In [138]:
world.unstack(level=0).columns

Index([1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971,
       1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983,
       1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995,
       1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
       2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015],
      dtype='int64', name='year')

In [139]:
world.unstack(level=1)

Unnamed: 0_level_0,country,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Arab World,Argentina,Armenia,Aruba,...,Uzbekistan,Vanuatu,"Venezuela, RB",Vietnam,Virgin Islands (U.S.),West Bank and Gaza,World,"Yemen, Rep.",Zambia,Zimbabwe
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1960,Population,8.994793e+06,,1.112489e+07,,,,,,,,...,,,8.146845e+06,,32000.0,,3.035056e+09,,3.049586e+06,3.752390e+06
1960,GDP,5.377778e+08,,2.723638e+09,,,,,,,,...,,,8.607600e+09,,24200000.0,,1.364643e+12,,6.987397e+08,1.052990e+09
1961,Population,9.164945e+06,,1.140486e+07,,,,,,,,...,,,8.461684e+06,,34100.0,,3.076121e+09,,3.142848e+06,3.876638e+06
1961,GDP,5.488889e+08,,2.434767e+09,,,,,,,,...,,,8.923367e+09,,25700000.0,,1.420440e+12,,6.823597e+08,1.096647e+09
1962,Population,9.343772e+06,,1.169015e+07,,,,,2.128768e+07,,,...,,,8.790590e+06,,36300.0,,3.129064e+09,,3.240664e+06,4.006262e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2013,GDP,2.004633e+10,1.278103e+10,2.097035e+11,3.249101e+09,1.249121e+11,1.200588e+09,2.846994e+12,6.239320e+11,1.112147e+10,,...,5.679566e+10,8.017876e+08,3.713366e+11,1.712220e+11,,1.247600e+10,7.643132e+13,3.595450e+10,2.804552e+10,1.349023e+10
2014,Population,3.162751e+07,2.893654e+06,3.893433e+07,,2.422752e+07,9.090000e+04,3.842226e+08,4.298003e+07,3.006154e+06,,...,3.075770e+07,2.588830e+05,,9.072890e+07,,4.294682e+06,7.260780e+09,,1.572134e+07,1.524586e+07
2014,GDP,2.005019e+10,1.327796e+10,2.135185e+11,,1.267751e+11,1.220976e+09,2.873600e+12,5.480549e+11,1.164444e+10,,...,6.313285e+10,8.149546e+08,,1.862047e+11,,1.271560e+10,7.810634e+13,,2.713464e+10,1.419691e+10
2015,Population,3.252656e+07,2.889167e+06,3.966652e+07,,2.502197e+07,9.181800e+04,3.920223e+08,,3.017712e+06,,...,3.129950e+07,,,9.170380e+07,,4.422143e+06,7.346633e+09,,1.621177e+07,1.560275e+07


In [140]:
world.unstack(level=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,Afghanistan,8.994793e+06,5.377778e+08
1960,Algeria,1.112489e+07,2.723638e+09
1960,Australia,1.027648e+07,1.856759e+10
1960,Austria,7.047539e+06,6.592694e+09
1960,"Bahamas, The",1.095260e+05,1.698023e+08
...,...,...,...
2015,Vietnam,9.170380e+07,1.935994e+11
2015,West Bank and Gaza,4.422143e+06,1.267740e+10
2015,World,7.346633e+09,7.343364e+13
2015,Zambia,1.621177e+07,2.120156e+10


In [141]:
world.unstack(level=-3)

Unnamed: 0_level_0,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Population,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,2.518362e+07,2.587754e+07,2.652874e+07,2.720729e+07,2.796221e+07,2.880917e+07,2.972680e+07,3.068250e+07,3.162751e+07,3.252656e+07
Afghanistan,GDP,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,1.373333e+09,1.408889e+09,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,Population,,,,,,,,,,,...,2.992547e+06,2.970017e+06,2.947314e+06,2.927519e+06,2.913021e+06,2.904780e+06,2.900247e+06,2.896652e+06,2.893654e+06,2.889167e+06
Albania,GDP,,,,,,,,,,,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,Population,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,3.374933e+07,3.426197e+07,3.481106e+07,3.540179e+07,3.603616e+07,3.671713e+07,3.743943e+07,3.818614e+07,3.893433e+07,3.966652e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Yemen, Rep.",GDP,,,,,,,,,,,...,1.908173e+10,2.563367e+10,3.039720e+10,2.845950e+10,3.090675e+10,3.107886e+10,3.207477e+10,3.595450e+10,,
Zambia,Population,3.049586e+06,3.142848e+06,3.240664e+06,3.342894e+06,3.449266e+06,3.559687e+06,3.674088e+06,3.792864e+06,3.916928e+06,4.047479e+06,...,1.238151e+07,1.273868e+07,1.311458e+07,1.350785e+07,1.391744e+07,1.434353e+07,1.478658e+07,1.524609e+07,1.572134e+07,1.621177e+07
Zambia,GDP,6.987397e+08,6.823597e+08,6.792797e+08,7.043397e+08,8.226397e+08,1.061200e+09,1.239000e+09,1.340639e+09,1.573739e+09,1.926399e+09,...,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026555e+10,2.345952e+10,2.550306e+10,2.804552e+10,2.713464e+10,2.120156e+10
Zimbabwe,Population,3.752390e+06,3.876638e+06,4.006262e+06,4.140804e+06,4.279561e+06,4.422132e+06,4.568320e+06,4.718612e+06,4.874113e+06,5.036321e+06,...,1.312794e+07,1.329780e+07,1.349546e+07,1.372100e+07,1.397390e+07,1.425559e+07,1.456548e+07,1.489809e+07,1.524586e+07,1.560275e+07


In [142]:
world.unstack(level=[1,0])

country,Afghanistan,Algeria,Australia,Austria,"Bahamas, The",Bangladesh,Belgium,Belize,Benin,Bermuda,...,United Kingdom,United States,Upper middle income,Uruguay,Uzbekistan,Vietnam,West Bank and Gaza,World,Zambia,Zimbabwe
year,1960,1960,1960,1960,1960,1960,1960,1960,1960,1960,...,2015,2015,2015,2015,2015,2015,2015,2015,2015,2015
Population,8994793.0,11124890.0,10276480.0,7047539.0,109526.0,48200700.0,9153489.0,92068.0,2431620.0,44400.0,...,65138230.0,321418800.0,2550326000.0,3431555.0,31299500.0,91703800.0,4422143.0,7346633000.0,16211770.0,15602750.0
GDP,537777800.0,2723638000.0,18567590000.0,6592694000.0,169802300.0,4274894000.0,11658720000.0,28072480.0,226195600.0,84466650.0,...,2848755000000.0,17947000000000.0,19732880000000.0,53442700000.0,66732800000.0,193599400000.0,12677400000.0,73433640000000.0,21201560000.0,13892940000.0


In [144]:
world.unstack(level=[0,1]).sort_index(axis=1)

year,1960,1960,1960,1960,1960,1960,1960,1960,1960,1960,...,2015,2015,2015,2015,2015,2015,2015,2015,2015,2015
country,Afghanistan,Algeria,Australia,Austria,"Bahamas, The",Bangladesh,Belgium,Belize,Benin,Bermuda,...,United Kingdom,United States,Upper middle income,Uruguay,Uzbekistan,Vietnam,West Bank and Gaza,World,Zambia,Zimbabwe
Population,8994793.0,11124890.0,10276480.0,7047539.0,109526.0,48200700.0,9153489.0,92068.0,2431620.0,44400.0,...,65138230.0,321418800.0,2550326000.0,3431555.0,31299500.0,91703800.0,4422143.0,7346633000.0,16211770.0,15602750.0
GDP,537777800.0,2723638000.0,18567590000.0,6592694000.0,169802300.0,4274894000.0,11658720000.0,28072480.0,226195600.0,84466650.0,...,2848755000000.0,17947000000000.0,19732880000000.0,53442700000.0,66732800000.0,193599400000.0,12677400000.0,73433640000000.0,21201560000.0,13892940000.0


In [145]:
world.unstack(level=["year", "country"]).sort_index(axis=1)

year,1960,1960,1960,1960,1960,1960,1960,1960,1960,1960,...,2015,2015,2015,2015,2015,2015,2015,2015,2015,2015
country,Afghanistan,Algeria,Australia,Austria,"Bahamas, The",Bangladesh,Belgium,Belize,Benin,Bermuda,...,United Kingdom,United States,Upper middle income,Uruguay,Uzbekistan,Vietnam,West Bank and Gaza,World,Zambia,Zimbabwe
Population,8994793.0,11124890.0,10276480.0,7047539.0,109526.0,48200700.0,9153489.0,92068.0,2431620.0,44400.0,...,65138230.0,321418800.0,2550326000.0,3431555.0,31299500.0,91703800.0,4422143.0,7346633000.0,16211770.0,15602750.0
GDP,537777800.0,2723638000.0,18567590000.0,6592694000.0,169802300.0,4274894000.0,11658720000.0,28072480.0,226195600.0,84466650.0,...,2848755000000.0,17947000000000.0,19732880000000.0,53442700000.0,66732800000.0,193599400000.0,12677400000.0,73433640000000.0,21201560000.0,13892940000.0


## The pivot Method
- The `pivot` method reshapes data from a tall format to a wide format
- Ask yourself which directionthe data will expand in if you add more entries.
- tall/lock expands down wide expands out
- `index` param sets the horizontal index of the pivoted **DataFram**
- `columns` param sets the column whose values will be the columns in the pivoted **DataFrame**.
- `values` param sets the values of the pivoted **DataFrame**. Pandas will populate the correct values based on the index and column intersections.

In [164]:
sales_df = pd.read_csv("salesmen.csv", parse_dates=["Date"], date_format="%m/%d/%Y")
sales_df.head()

Unnamed: 0,Date,Salesman,Revenue
0,2025-01-01,Sharon,7172
1,2025-01-02,Sharon,6362
2,2025-01-03,Sharon,5982
3,2025-01-04,Sharon,7917
4,2025-01-05,Sharon,7837


In [166]:
sales = sales_df.copy()

In [167]:
sales = sales.pivot(index="Date", columns="Salesman", values="Revenue").sort_index()

In [168]:
sales.loc["2025-01-01":"2025-01-20", ("Alexander", "Sharon")]

Salesman,Alexander,Sharon
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-01-01,4430,7172
2025-01-02,8026,6362
2025-01-03,5188,5982
2025-01-04,3144,7917
2025-01-05,938,7837
2025-01-06,8702,1744
2025-01-07,4250,918
2025-01-08,9719,9863
2025-01-09,5614,8337
2025-01-10,301,7543


In [169]:
sales

Salesman,Alexander,Dave,Oscar,Ronald,Sharon
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-01-01,4430,1864,5250,2639,7172
2025-01-02,8026,8278,8661,4951,6362
2025-01-03,5188,4226,7075,2703,5982
2025-01-04,3144,3868,2524,4258,7917
2025-01-05,938,2287,2793,7771,7837
...,...,...,...,...,...
2025-12-27,6666,2843,835,2981,2045
2025-12-28,1243,8888,3073,6129,100
2025-12-29,3498,9490,6424,7662,4115
2025-12-30,8858,3594,7088,2570,2577


## The melt Method
- The `melt` method is the inverse of the `pivot` method.
- It takes a 'wide' dataset and converts it to a 'tall' dataset.
- The melt method is ideal when you have multiple columns storing the same datapoint.
- `id_vars` param is column  whose values will be repeated for every column
- The `var_name` param sets the name of the new column for the varying values (the former column names).
- The `value_name` parameter set the new name of the values column (holding the values from the original **DataFrame**)
- 

In [174]:
quarters_df = pd.read_csv("quarters.csv")

In [176]:
quarters = quarters_df.copy()
quarters.head()

Unnamed: 0,Salesman,Q1,Q2,Q3,Q4
0,Boris,602908,233879,354479,32704
1,Piers,43790,514863,297151,544493
2,Tommy,392668,113579,430882,247231
3,Travis,834663,266785,749238,570524
4,Cindy,580935,411379,110390,651572


In [184]:
quarters.melt(id_vars=["Salesman"]) # When you don't rename

Unnamed: 0,Salesman,variable,value
0,Boris,Q1,602908
1,Piers,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Cindy,Q1,580935
5,Rob,Q1,656644
6,Mike,Q1,486141
7,Stacy,Q1,479662
8,Alexandra,Q1,992673
9,Boris,Q2,233879


In [181]:
quarters.melt(id_vars=["Salesman"], var_name="Quarter", value_name="Revenue").set_index(["Quarter", "Salesman"]).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenue
Quarter,Salesman,Unnamed: 2_level_1
Q1,Alexandra,992673
Q1,Boris,602908
Q1,Cindy,580935
Q1,Mike,486141
Q1,Piers,43790
Q1,Rob,656644
Q1,Stacy,479662
Q1,Tommy,392668
Q1,Travis,834663
Q2,Alexandra,879183


## The pivot_table Method
- A pivot table is a table whose values are aggregations of groups of values from another table
- The `values` parameter accepts the numeric column whose values will be aggregated.
- The `aggfunc` param declares the agg function
- The `index` parameter sets the index labels of the pivot table
- The columns parameter sets the column labels of the pivot table

In [186]:
foods_df = pd.read_csv("foods.csv")
foods = foods_df.copy()
foods

Unnamed: 0,First Name,Gender,City,Frequency,Item,Spend
0,Wanda,Female,Stamford,Weekly,Burger,15.66
1,Eric,Male,Stamford,Daily,Chalupa,10.56
2,Charles,Male,New York,Never,Sushi,42.14
3,Anna,Female,Philadelphia,Once,Ice Cream,11.01
4,Deborah,Female,Philadelphia,Daily,Chalupa,23.49
...,...,...,...,...,...,...
995,Donna,Female,New York,Monthly,Sushi,83.53
996,Albert,Male,Philadelphia,Daily,Sushi,72.88
997,Jean,Female,Stamford,Weekly,Donut,5.85
998,Jessica,Female,New York,Daily,Chalupa,43.19


In [189]:
foods.pivot_table(values="Spend", index="Gender") # defaults to mean agg func

Unnamed: 0_level_0,Spend
Gender,Unnamed: 1_level_1
Female,50.709629
Male,49.397623


In [196]:
foods.pivot_table(values="Spend", aggfunc="mean", index="Gender")

Unnamed: 0_level_0,Spend
Gender,Unnamed: 1_level_1
Female,50.709629
Male,49.397623


In [198]:
foods.pivot_table(values="Spend", aggfunc="sum", index=["Gender", "City"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Spend
Gender,City,Unnamed: 2_level_1
Female,New York,7543.26
Female,Philadelphia,9632.69
Female,Stamford,8787.38
Male,New York,8266.31
Male,Philadelphia,8201.85
Male,Stamford,7637.88


In [199]:
foods.pivot_table(values="Spend", aggfunc="sum", index=["City", "Item"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Spend
City,Item,Unnamed: 2_level_1
New York,Burger,2533.13
New York,Burrito,2378.35
New York,Chalupa,2104.35
New York,Donut,2792.05
New York,Ice Cream,3125.25
New York,Sushi,2876.44
Philadelphia,Burger,2577.42
Philadelphia,Burrito,2771.69
Philadelphia,Chalupa,2787.56
Philadelphia,Donut,2888.62


In [203]:
foods.pivot_table(values="Spend", aggfunc="sum", columns="City", index=["Item", "Frequency"])

Unnamed: 0_level_0,City,New York,Philadelphia,Stamford
Item,Frequency,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Burger,Daily,582.13,344.38,342.38
Burger,Monthly,359.15,428.19,265.65
Burger,Never,188.21,236.28,90.97
Burger,Often,150.43,362.65,279.88
Burger,Once,401.42,156.39,421.69
Burger,Seldom,288.84,597.11,257.28
Burger,Weekly,254.04,131.89,465.23
Burger,Yearly,308.91,320.53,532.1
Burrito,Daily,460.66,487.33,471.86
Burrito,Monthly,270.28,123.86,187.54


In [204]:
foods.pivot_table(values="Spend", aggfunc="sum", columns=["City", "Gender"], index="Item")

City,New York,New York,Philadelphia,Philadelphia,Stamford,Stamford
Gender,Female,Male,Female,Male,Female,Male
Item,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Burger,1239.04,1294.09,1639.24,938.18,1216.02,1439.16
Burrito,978.95,1399.4,1458.76,1312.93,1820.11,1300.29
Chalupa,876.58,1227.77,1673.33,1114.23,1602.35,1150.26
Donut,1446.78,1345.27,1639.26,1249.36,1656.96,1421.13
Ice Cream,1521.62,1603.63,1479.22,2191.27,1032.03,1059.22
Sushi,1480.29,1396.15,1742.88,1395.88,1459.91,1267.82
