# MultiIndex Operations. Pivot and Reshape Functions
- `df.unstack()`: Unstack a level of index.
- `df.stack()`: Stack a level of columns.
- `df.swaplevel()`: Swap levels of a MultiIndex.
- `df.droplevel()`: Drop a level from a MultiIndex.
- `df.reorder_levels()`: Reorder levels of a MultiIndex.
- `df.pivot()`: Reshape DataFrame.
- `df.pivot_table()`: Create pivot table with aggregation.
- `df.xs()`: Select a cross-section from a DataFrame.

In [1]:
import  pandas as pd
import numpy as np
from sympy.polys.subresultants_qq_zz import pivot
from xarray.util.generate_ops import inplace

In [2]:

data = {
    'Year': [2000, 2000, 2000, 2005, 2005, 2005, 2010, 2010, 2010, 2015, 2015, 2015, 2020, 2020, 2020, 2025, 2025, 2025, 2030, 2030],
    'Country': ['USA', 'Canada', 'Mexico', 'USA', 'Canada', 'Mexico', 'USA', 'Canada', 'Mexico', 'USA', 'Canada', 'Mexico', 'USA', 'Canada', 'Mexico', 'USA', 'Canada', 'Mexico', 'USA', 'Canada'],
    'Population': [282200000, 30769700, 98881000, 295700000, 32366300, 106212000, 309300000, 34063300, 112336000, 321400000, 35853400, 117886000, 331000000, 37590000, 126014024, 341000000, 38750000, 134116545, 350000000, 39980000]
}

df_population = pd.DataFrame(data)
print(df_population)


    Year Country  Population
0   2000     USA   282200000
1   2000  Canada    30769700
2   2000  Mexico    98881000
3   2005     USA   295700000
4   2005  Canada    32366300
5   2005  Mexico   106212000
6   2010     USA   309300000
7   2010  Canada    34063300
8   2010  Mexico   112336000
9   2015     USA   321400000
10  2015  Canada    35853400
11  2015  Mexico   117886000
12  2020     USA   331000000
13  2020  Canada    37590000
14  2020  Mexico   126014024
15  2025     USA   341000000
16  2025  Canada    38750000
17  2025  Mexico   134116545
18  2030     USA   350000000
19  2030  Canada    39980000


# pandas.DataFrame.unstack

`DataFrame.unstack(level=-1, fill_value=None, sort=True)[source]`
Pivot a level of the (necessarily hierarchical) index labels.

Returns a DataFrame having a new level of column labels whose inner-most level consists of the pivoted index labels.

If the index is not a MultiIndex, the output will be a Series (the analogue of stack when the columns are not a MultiIndex).

## Parameters

- **`level`**: int, str, or list of these, default -1 (last level)
  Level(s) of index to unstack, can pass level name.

- **`fill_value`**: int, str or dict
  Replace NaN with this value if the unstack produces missing values.

- **`sort`**: bool, default True
  Sort the level(s) in the resulting MultiIndex columns.

## Returns

- **Series or DataFrame**
  The resulting DataFrame or Series after unstacking.


In [3]:
df_population.set_index(['Year','Country'],inplace=True)

In [4]:
df_population.unstack(level='Year',sort=True).fillna(method='ffill',axis=1)

  df_population.unstack(level='Year',sort=True).fillna(method='ffill',axis=1)


Unnamed: 0_level_0,Population,Population,Population,Population,Population,Population,Population
Year,2000,2005,2010,2015,2020,2025,2030
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Canada,30769700.0,32366300.0,34063300.0,35853400.0,37590000.0,38750000.0,39980000.0
Mexico,98881000.0,106212000.0,112336000.0,117886000.0,126014024.0,134116545.0,134116545.0
USA,282200000.0,295700000.0,309300000.0,321400000.0,331000000.0,341000000.0,350000000.0


In [5]:
print(df_population)

              Population
Year Country            
2000 USA       282200000
     Canada     30769700
     Mexico     98881000
2005 USA       295700000
     Canada     32366300
     Mexico    106212000
2010 USA       309300000
     Canada     34063300
     Mexico    112336000
2015 USA       321400000
     Canada     35853400
     Mexico    117886000
2020 USA       331000000
     Canada     37590000
     Mexico    126014024
2025 USA       341000000
     Canada     38750000
     Mexico    134116545
2030 USA       350000000
     Canada     39980000


# pandas.DataFrame.stack

`DataFrame.stack(level=-1, dropna=<no_default>, sort=<no_default>, future_stack=False)[source]`
Stack the prescribed level(s) from columns to index.

Return a reshaped DataFrame or Series having a multi-level index with one or more new inner-most levels compared to the current DataFrame. The new inner-most levels are created by pivoting the columns of the current dataframe:

- If the columns have a single level, the output is a Series;
- If the columns have multiple levels, the new index level(s) is (are) taken from the prescribed level(s) and the output is a DataFrame.

## Parameters

- **`level`**: int, str, list, default -1
  Level(s) to stack from the column axis onto the index axis, defined as one index or label, or a list of indices or labels.

- **`dropna`**: bool, default True
  Whether to drop rows in the resulting Frame/Series with missing values. Stacking a column level onto the index axis can create combinations of index and column values that are missing from the original dataframe.

- **`sort`**: bool, default True
  Whether to sort the levels of the resulting MultiIndex.

- **`future_stack`**: bool, default False
  Whether to use the new implementation that will replace the current implementation in pandas 3.0. When True, dropna and sort have no impact on the result and must remain unspecified.

## Returns

- **DataFrame or Series**
  Stacked dataframe or series.


In [6]:
print(df_population.stack().unstack())

              Population
Year Country            
2000 Canada     30769700
     Mexico     98881000
     USA       282200000
2005 Canada     32366300
     Mexico    106212000
     USA       295700000
2010 Canada     34063300
     Mexico    112336000
     USA       309300000
2015 Canada     35853400
     Mexico    117886000
     USA       321400000
2020 Canada     37590000
     Mexico    126014024
     USA       331000000
2025 Canada     38750000
     Mexico    134116545
     USA       341000000
2030 Canada     39980000
     USA       350000000


# pandas.DataFrame.swaplevel

`DataFrame.swaplevel(i=-2, j=-1, axis=0)[source]`
Swap levels `i` and `j` in a `MultiIndex`.

Default is to swap the two innermost levels of the index.

## Parameters

- **`i, j`**: int or str
  Levels of the indices to be swapped. Can pass level name as string.

- **`axis`**: {0 or ‘index’, 1 or ‘columns’}, default 0
  The axis to swap levels on. 0 or ‘index’ for row-wise, 1 or ‘columns’ for column-wise.

## Returns

- **DataFrame**
  DataFrame with levels swapped in `MultiIndex`.


In [7]:
print(df_population.swaplevel(0,1))

              Population
Country Year            
USA     2000   282200000
Canada  2000    30769700
Mexico  2000    98881000
USA     2005   295700000
Canada  2005    32366300
Mexico  2005   106212000
USA     2010   309300000
Canada  2010    34063300
Mexico  2010   112336000
USA     2015   321400000
Canada  2015    35853400
Mexico  2015   117886000
USA     2020   331000000
Canada  2020    37590000
Mexico  2020   126014024
USA     2025   341000000
Canada  2025    38750000
Mexico  2025   134116545
USA     2030   350000000
Canada  2030    39980000


# pandas.DataFrame.droplevel

`DataFrame.droplevel(level, axis=0)[source]`
Return Series/DataFrame with requested index / column level(s) removed.

## Parameters

- **`level`**: int, str, or list-like
  If a string is given, must be the name of a level. If list-like, elements must be names or positional indexes of levels.

- **`axis`**: {0 or ‘index’, 1 or ‘columns’}, default 0
  Axis along which the level(s) is removed:
  - `0 or ‘index’`: remove level(s) in column.
  - `1 or ‘columns’`: remove level(s) in row.
  - For Series this parameter is unused and defaults to 0.

## Returns

- **Series/DataFrame**
  Series/DataFrame with requested index / column level(s) removed.


In [8]:
df_population.droplevel(0)

Unnamed: 0_level_0,Population
Country,Unnamed: 1_level_1
USA,282200000
Canada,30769700
Mexico,98881000
USA,295700000
Canada,32366300
Mexico,106212000
USA,309300000
Canada,34063300
Mexico,112336000
USA,321400000


# pandas.DataFrame.reorder_levels

`DataFrame.reorder_levels(order, axis=0)[source]`
Rearrange index levels using input order. May not drop or duplicate levels.

## Parameters

- **`order`**: list of int or list of str
  List representing new level order. Reference level by number (position) or by key (label).

- **`axis`**: {0 or ‘index’, 1 or ‘columns’}, default 0
  Where to reorder levels.

## Returns

- **DataFrame**
  DataFrame with reordered levels in MultiIndex.


In [9]:
df_population.reorder_levels(['Country','Year'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Population
Country,Year,Unnamed: 2_level_1
USA,2000,282200000
Canada,2000,30769700
Mexico,2000,98881000
USA,2005,295700000
Canada,2005,32366300
Mexico,2005,106212000
USA,2010,309300000
Canada,2010,34063300
Mexico,2010,112336000
USA,2015,321400000


# pandas.DataFrame.pivot

`DataFrame.pivot(*, columns, index=<no_default>, values=<no_default>)[source]`
Return reshaped DataFrame organized by given index / column values.

Reshape data (produce a “pivot” table) based on column values. Uses unique values from specified index / columns to form axes of the resulting DataFrame. This function does not support data aggregation; multiple values will result in a `MultiIndex` in the columns. See the User Guide for more on reshaping.

## Parameters

- **`columns`**: str or object or a list of str
  Column to use to make new frame’s columns.

- **`index`**: str or object or a list of str, optional
  Column to use to make new frame’s index. If not given, uses existing index.

- **`values`**: str, object or a list of the previous, optional
  Column(s) to use for populating new frame’s values. If not specified, all remaining columns will be used, and the result will have hierarchically indexed columns.

## Returns

- **DataFrame**
  Returns reshaped DataFrame.

## Raises

- **ValueError**
  When there are any index, columns combinations with multiple values. Use `DataFrame.pivot_table` when you need to aggregate.


In [10]:
df_population.reset_index(inplace=True)

In [11]:
df_population.pivot(index='Year',columns='Country',values='Population')

Country,Canada,Mexico,USA
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000,30769700.0,98881000.0,282200000.0
2005,32366300.0,106212000.0,295700000.0
2010,34063300.0,112336000.0,309300000.0
2015,35853400.0,117886000.0,321400000.0
2020,37590000.0,126014024.0,331000000.0
2025,38750000.0,134116545.0,341000000.0
2030,39980000.0,,350000000.0


# pandas.DataFrame.pivot_table

`DataFrame.pivot_table(values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=<no_default>, sort=True)[source]`
Create a spreadsheet-style pivot table as a DataFrame.

The levels in the pivot table will be stored in `MultiIndex` objects (hierarchical indexes) on the index and columns of the result DataFrame.

## Parameters

- **`values`**: list-like or scalar, optional
  Column or columns to aggregate.

- **`index`**: column, Grouper, array, or list of the previous
  Keys to group by on the pivot table index. If a list is passed, it can contain any of the other types (except list). If an array is passed, it must be the same length as the data and will be used in the same manner as column values.

- **`columns`**: column, Grouper, array, or list of the previous
  Keys to group by on the pivot table column. If a list is passed, it can contain any of the other types (except list). If an array is passed, it must be the same length as the data and will be used in the same manner as column values.

- **`aggfunc`**: function, list of functions, dict, default “mean”
  If a list of functions is passed, the resulting pivot table will have hierarchical columns whose top level are the function names (inferred from the function objects themselves). If a dict is passed, the key is column to aggregate and the value is function or list of functions. If `margins=True`, `aggfunc` will be used to calculate the partial aggregates.

- **`fill_value`**: scalar, default None
  Value to replace missing values with (in the resulting pivot table, after aggregation).

- **`margins`**: bool, default False
  If `margins=True`, special All columns and rows will be added with partial group aggregates across the categories on the rows and columns.

- **`dropna`**: bool, default True
  Do not include columns whose entries are all NaN. If True, rows with a NaN value in any column will be omitted before computing margins.

- **`margins_name`**: str, default ‘All’
  Name of the row / column that will contain the totals when `margins` is True.

- **`observed`**: bool, default False
  This only applies if any of the groupers are Categoricals. If True, only show observed values for categorical groupers. If False, show all values for categorical groupers.
  Deprecated since version 2.2.0: The default value of False is deprecated and will change to True in a future version of pandas.

- **`sort`**: bool, default True
  Specifies if the result should be sorted.
  - **Added in version 1.3.0**.

## Returns

- **DataFrame**
  An Excel style pivot table.


In [12]:
df_population

Unnamed: 0,Year,Country,Population
0,2000,USA,282200000
1,2000,Canada,30769700
2,2000,Mexico,98881000
3,2005,USA,295700000
4,2005,Canada,32366300
5,2005,Mexico,106212000
6,2010,USA,309300000
7,2010,Canada,34063300
8,2010,Mexico,112336000
9,2015,USA,321400000


In [21]:
df_population.pivot_table(values='Population',columns='Country',index='Year',aggfunc='sum',margins=True,margins_name='All population')

Country,Canada,Mexico,USA,All population
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000,30769700.0,98881000.0,282200000.0,411850700
2005,32366300.0,106212000.0,295700000.0,434278300
2010,34063300.0,112336000.0,309300000.0,455699300
2015,35853400.0,117886000.0,321400000.0,475139400
2020,37590000.0,126014024.0,331000000.0,494604024
2025,38750000.0,134116545.0,341000000.0,513866545
2030,39980000.0,,350000000.0,389980000
All population,249372700.0,695445569.0,2230600000.0,3175418269


# pandas.DataFrame.xs

`DataFrame.xs(key, axis=0, level=None, drop_level=True)[source]`
Return cross-section from the Series/DataFrame.

This method takes a `key` argument to select data at a particular level of a `MultiIndex`.

## Parameters

- **`key`**: label or tuple of label
  Label contained in the index, or partially in a `MultiIndex`.

- **`axis`**: {0 or ‘index’, 1 or ‘columns’}, default 0
  Axis to retrieve cross-section on.

- **`level`**: object, defaults to first n levels (n=1 or len(key))
  In case of a key partially contained in a `MultiIndex`, indicate which levels are used. Levels can be referred by label or position.

- **`drop_level`**: bool, default True
  If False, returns object with same levels as self.

## Returns

- **Series or DataFrame**
  Cross-section from the original Series or DataFrame corresponding to the selected index levels.


In [28]:
df_population1=df_population.copy()

In [29]:
df_population1.set_index(['Year','Country'],inplace=True)

In [42]:
df_population1=df_population1.swaplevel(0,1).sort_index(level=0)

In [49]:
df_population1.xs(('Canada',2000))

Population    30769700
Name: (Canada, 2000), dtype: int64