In [1]:
import pandas as pd

# Reading CSV file
bigmac = pd.read_csv('Datasets/bigmac.csv', parse_dates=['Date'])
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


### # Multiple indexes can be set by passing a list of column headers or column indexes to "keys" parameter of df.set_index() method.

In [4]:
# Setting Multiple indexes.
# Levels of Indexes are set in the same order as they are passed in the list.
# Here, we have same dates occuring for many countries.

bigmac.set_index(keys=['Date', 'Country'])    # Date & Country have level 0 & 1.

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
...,...,...
2010-01-01,Turkey,3.83
2010-01-01,UAE,2.99
2010-01-01,Ukraine,1.83
2010-01-01,United States,3.58


#### It is a good practice to assign column with least unique values to lower levels.

#### eg. 3 columns with 2, 30, 100 unique values must be leveled in order 0, 1, 2.

In [7]:
bigmac.set_index(keys=['Country', 'Date'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2016-01-01,2.39
Australia,2016-01-01,3.74
Brazil,2016-01-01,3.35
Britain,2016-01-01,4.22
Canada,2016-01-01,4.14
...,...,...
Turkey,2010-01-01,3.83
UAE,2010-01-01,2.99
Ukraine,2010-01-01,1.83
United States,2010-01-01,3.58


In [5]:
# Making changes permanent.

bigmac.set_index(keys=['Date', 'Country'], inplace=True)

# # Sorting Multi-Indexes.

#### using df.sort_index() in a multi-index dataframe will sort the index with lowest level first and then the index with higher level.

In [13]:
# Sort all indexes of bigmac df

bigmac.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
...,...,...
2016-01-01,Ukraine,1.54
2016-01-01,United States,4.93
2016-01-01,Uruguay,3.74
2016-01-01,Venezuela,0.66


## Sorting indexes in different orders

In [20]:
# sorting dates descendingly & country ascendingly.

bigmac.sort_index(ascending=[False, True], inplace=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Austria,3.76
2016-01-01,Belgium,4.25
2016-01-01,Brazil,3.35
...,...,...
2010-01-01,Turkey,3.83
2010-01-01,UAE,2.99
2010-01-01,Ukraine,1.83
2010-01-01,United States,3.58


## Sorting individual indexes.

In [25]:
bigmac.sort_index(level=0)
bigmac.sort_index(level='Date', inplace=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
...,...,...
2016-01-01,Ukraine,1.54
2016-01-01,United States,4.93
2016-01-01,Uruguay,3.74
2016-01-01,Venezuela,0.66


# # Extracting index headers.

In [14]:
# Extracting index header in a multi-index dataframe.

bigmac.index

MultiIndex([('2016-01-01',      'Argentina'),
            ('2016-01-01',      'Australia'),
            ('2016-01-01',         'Brazil'),
            ('2016-01-01',        'Britain'),
            ('2016-01-01',         'Canada'),
            ('2016-01-01',          'Chile'),
            ('2016-01-01',          'China'),
            ('2016-01-01',       'Colombia'),
            ('2016-01-01',     'Costa Rica'),
            ('2016-01-01', 'Czech Republic'),
            ...
            ('2010-01-01',      'Sri Lanka'),
            ('2010-01-01',         'Sweden'),
            ('2010-01-01',    'Switzerland'),
            ('2010-01-01',         'Taiwan'),
            ('2010-01-01',       'Thailand'),
            ('2010-01-01',         'Turkey'),
            ('2010-01-01',            'UAE'),
            ('2010-01-01',        'Ukraine'),
            ('2010-01-01',  'United States'),
            ('2010-01-01',        'Uruguay')],
           names=['Date', 'Country'], length=652)

In [15]:
# Extracting second index headers.

bigmac.index[1]

(Timestamp('2016-01-01 00:00:00'), 'Australia')

In [None]:
# Give multiIndex directly from pd.read_csv(index_col).

bigmac = pd.read_csv('Datasets/bigmac.csv', parse_dates=['Date'], index_col=['Date', 'Country'])

output = None

# # .get_level_values() method

#### df.index.get_level_values() returns a list of headers of passed index.

In [17]:
# Getting the values of 'Date' index for each row.
bigmac.index.get_level_values('Date')
bigmac.index.get_level_values(0)                      # Same as above.

# Getting the values of 'Country' index for each row
bigmac.index.get_level_values('Country')
bigmac.index.get_level_values(1)                      # Same as above.

DatetimeIndex(['2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01',
               ...
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

# # .set_names() method.

#### df.index.set_names(names, level) is used to rename the index headings of multi-Indexes.

In [18]:
# Renaming 'Date' & 'Country' index headings

bigmac.index.set_names(names=['Day', 'location'], inplace=False)

MultiIndex([('2016-01-01',      'Argentina'),
            ('2016-01-01',      'Australia'),
            ('2016-01-01',         'Brazil'),
            ('2016-01-01',        'Britain'),
            ('2016-01-01',         'Canada'),
            ('2016-01-01',          'Chile'),
            ('2016-01-01',          'China'),
            ('2016-01-01',       'Colombia'),
            ('2016-01-01',     'Costa Rica'),
            ('2016-01-01', 'Czech Republic'),
            ...
            ('2010-01-01',      'Sri Lanka'),
            ('2010-01-01',         'Sweden'),
            ('2010-01-01',    'Switzerland'),
            ('2010-01-01',         'Taiwan'),
            ('2010-01-01',       'Thailand'),
            ('2010-01-01',         'Turkey'),
            ('2010-01-01',            'UAE'),
            ('2010-01-01',        'Ukraine'),
            ('2010-01-01',  'United States'),
            ('2010-01-01',        'Uruguay')],
           names=['Day', 'location'], length=652)

In [19]:
# Renaming 'Country' column header only

bigmac.index.set_names(names='location', level=1, inplace=False)

MultiIndex([('2016-01-01',      'Argentina'),
            ('2016-01-01',      'Australia'),
            ('2016-01-01',         'Brazil'),
            ('2016-01-01',        'Britain'),
            ('2016-01-01',         'Canada'),
            ('2016-01-01',          'Chile'),
            ('2016-01-01',          'China'),
            ('2016-01-01',       'Colombia'),
            ('2016-01-01',     'Costa Rica'),
            ('2016-01-01', 'Czech Republic'),
            ...
            ('2010-01-01',      'Sri Lanka'),
            ('2010-01-01',         'Sweden'),
            ('2010-01-01',    'Switzerland'),
            ('2010-01-01',         'Taiwan'),
            ('2010-01-01',       'Thailand'),
            ('2010-01-01',         'Turkey'),
            ('2010-01-01',            'UAE'),
            ('2010-01-01',        'Ukraine'),
            ('2010-01-01',  'United States'),
            ('2010-01-01',        'Uruguay')],
           names=['Date', 'location'], length=652)

# # df.loc[] & df.iloc[] accessor

#### use the df.index & df.columns attributes to see what type of row and column argument to pass in df.loc[] accessor.

In [6]:
# df.loc[] requires a sorted dataframe.

bigmac.sort_index(inplace=True)

In [35]:
bigmac.loc[('2016-01-01', )]

Unnamed: 0_level_0,Price in US Dollars
Country,Unnamed: 1_level_1
Argentina,2.39
Australia,3.74
Austria,3.76
Belgium,4.25
Brazil,3.35
Britain,4.22
Canada,4.14
Chile,2.94
China,2.68
Colombia,2.43


In [29]:
bigmac.loc['2016-01-01', 'Canada']

Price in US Dollars    4.14
Name: (2016-01-01 00:00:00, Canada), dtype: float64

In [33]:
# returns a multi-index series.

bigmac.loc[('2016-01-01', 'Canada'), 'Price in US Dollars']

Date        Country
2016-01-01  Canada     4.14
Name: Price in US Dollars, dtype: float64

#### Multi-indexed dataframe do not affect iloc[] functionality it works the same as in single indexed DataFrames.

In [37]:
# Extracting the value @ first row.

bigmac.iloc[0]

Price in US Dollars    1.84
Name: (2010-01-01 00:00:00, Argentina), dtype: float64

In [39]:
# Extracting values @ 1st, 2nd, 3rd and 4th row.

bigmac.iloc[[1, 2, 3, 4]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


In [40]:
# Extracting values from 2nd to 4th row.

bigmac.iloc[1:5]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97


# # df.transpose() method

#### df.transpose() swaps the rows with columns & vice-versa.

In [9]:
# creating a transposed bigmac df

bigmac.transpose()

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


In [13]:
# assigning a transpose bigmac to a variable.

bigmac_t = bigmac.transpose()

### df.loc[] on multi-indexed column.

In [16]:
bigmac_t.loc[('Price in US Dollars', ), ('2010-01-01', 'Brazil')]

Date,2010-01-01
Country,Brazil
Price in US Dollars,4.76


In [17]:
bigmac_t.loc['Price in US Dollars', ('2010-01-01', 'Brazil')]

Date        Country
2010-01-01  Brazil     4.76
Name: Price in US Dollars, dtype: float64

# # df.swap_level() method.

#### df.swap_level() swaps the passed indexes with each other in a given axis(default=0).

In [25]:
# Swaping indexes on a multi-indexed row headers.

bigmac.swaplevel()

bigmac.swaplevel('Date', 'Country', axis=0)
bigmac.swaplevel(0, 1, axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2010-01-01,1.84
Australia,2010-01-01,3.98
Brazil,2010-01-01,4.76
Britain,2010-01-01,3.67
Canada,2010-01-01,3.97
...,...,...
Ukraine,2016-01-01,1.54
United States,2016-01-01,4.93
Uruguay,2016-01-01,3.74
Venezuela,2016-01-01,0.66


In [26]:
# Swaping indexes on a multi-indexed column headers.
# mention "axis" parameter while swaping column headers.

bigmac_t.swaplevel(axis=1)

bigmac_t.swaplevel('Date', 'Country', axis=1)
bigmac_t.swaplevel(0, 1, axis=1)

Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


# # df.stack() method

#### Return a reshaped DataFrame or Series having a multi-level row-headers with one or more new inner-most levels compared to the current DataFrame. The new inner-most levels are created by pivoting the columns of the current dataframe.

In [39]:
# importing a new dataset.

world = pd.read_csv('Datasets/worldstats.csv', index_col=['country', 'year'])
world.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
Arab World,2012,368802611.0,2773270000000.0
Arab World,2011,361031820.0,2497945000000.0
Arab World,2010,353112237.0,2103825000000.0
Arab World,2009,345054176.0,1798878000000.0
Arab World,2008,336886468.0,2081343000000.0
Arab World,2007,328766559.0,1641666000000.0
Arab World,2006,320906736.0,1404190000000.0


In [56]:
# Number of rows & columns in the original DataFrame.

world.shape

(11211, 2)

In [48]:
# Stacking the DataFrame.

world.stack()

country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
                  GDP           2.873600e+12
            2013  Population    3.765043e+08
                                    ...     
Zimbabwe    1962  GDP           1.117602e+09
            1961  Population    3.876638e+06
                  GDP           1.096647e+09
            1960  Population    3.752390e+06
                  GDP           1.052990e+09
Length: 22422, dtype: float64

In [57]:
# Assigning the stacked DataFrame to a variable.
# S.to_frame() converts a Series into a DataFrame.

world_stacked = world.stack().to_frame()
world_stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,Population,3.920223e+08
Arab World,2015,GDP,2.530102e+12
Arab World,2014,Population,3.842226e+08
Arab World,2014,GDP,2.873600e+12
Arab World,2013,Population,3.765043e+08
...,...,...,...
Zimbabwe,1962,GDP,1.117602e+09
Zimbabwe,1961,Population,3.876638e+06
Zimbabwe,1961,GDP,1.096647e+09
Zimbabwe,1960,Population,3.752390e+06


#### Stacking increases number of rows by a multiple of columns stacked, reducing the number of columns by a factor of columns stacked.

#### ie. If 2 columns are stacked number of rows will double & number of columns will halve.

In [55]:
# Number of rows have doubled from 112211 & columns halved.

world_stacked.shape

(22422, 1)

# # df.unstack() method.

#### df.unstack() is the exact opposite of df.stack().

In [59]:
# Unstacking previously stacked DataFrame.

world_stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0
Unnamed: 0_level_1,Unnamed: 1_level_1,Population,GDP
country,year,Unnamed: 2_level_2,Unnamed: 3_level_2
Afghanistan,1960,8994793.0,5.377778e+08
Afghanistan,1961,9164945.0,5.488889e+08
Afghanistan,1962,9343772.0,5.466667e+08
Afghanistan,1963,9531555.0,7.511112e+08
Afghanistan,1964,9728645.0,8.000000e+08
...,...,...,...
Zimbabwe,2011,14255592.0,1.095623e+10
Zimbabwe,2012,14565482.0,1.239272e+10
Zimbabwe,2013,14898092.0,1.349023e+10
Zimbabwe,2014,15245855.0,1.419691e+10


In [60]:
world_stacked.unstack().unstack()

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Unnamed: 0_level_1,Population,Population,Population,Population,Population,Population,Population,Population,Population,Population,...,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP,GDP
year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
Afghanistan,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,,,,,,,,,,,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,1.170273e+11,1.349771e+11,1.710007e+11,1.372110e+11,1.612073e+11,2.000131e+11,2.090474e+11,2.097035e+11,2.135185e+11,1.668386e+11
Andorra,,,,,,,,,,,...,3.536452e+09,4.010785e+09,4.001349e+09,3.649863e+09,3.346317e+09,3.427236e+09,3.146178e+09,3.249101e+09,,
Angola,,,,,,,,,,,...,4.178948e+10,6.044892e+10,8.417803e+10,7.549238e+10,8.247091e+10,1.041159e+11,1.153984e+11,1.249121e+11,1.267751e+11,1.026431e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza,,,,,,,,,,,...,4.910100e+09,5.505800e+09,6.673500e+09,7.268200e+09,8.913100e+09,1.045985e+10,1.127940e+10,1.247600e+10,1.271560e+10,1.267740e+10
World,3.035056e+09,3.076121e+09,3.129064e+09,3.193947e+09,3.259355e+09,3.326054e+09,3.395866e+09,3.465297e+09,3.535512e+09,3.609910e+09,...,5.107451e+13,5.758343e+13,6.312856e+13,5.983553e+13,6.564782e+13,7.284314e+13,7.442836e+13,7.643132e+13,7.810634e+13,7.343364e+13
"Yemen, Rep.",,,,,,,,,,,...,1.908173e+10,2.563367e+10,3.039720e+10,2.845950e+10,3.090675e+10,3.107886e+10,3.207477e+10,3.595450e+10,,
Zambia,3.049586e+06,3.142848e+06,3.240664e+06,3.342894e+06,3.449266e+06,3.559687e+06,3.674088e+06,3.792864e+06,3.916928e+06,4.047479e+06,...,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026555e+10,2.345952e+10,2.550306e+10,2.804552e+10,2.713464e+10,2.120156e+10


In [63]:
world_stacked.unstack().unstack().unstack().to_frame().head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0
Unnamed: 0_level_1,Unnamed: 1_level_1,year,country,Unnamed: 4_level_1
0,Population,1960,Afghanistan,8994793.0
0,Population,1960,Albania,
0,Population,1960,Algeria,11124892.0
0,Population,1960,Andorra,
0,Population,1960,Angola,
0,Population,1960,Antigua and Barbuda,
0,Population,1960,Arab World,
0,Population,1960,Argentina,
0,Population,1960,Armenia,
0,Population,1960,Aruba,


### df.unstack() parameters.

In [65]:
world_stacked.unstack(level=2)
world_stacked.unstack(level=-1)

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0
Unnamed: 0_level_1,Unnamed: 1_level_1,Population,GDP
country,year,Unnamed: 2_level_2,Unnamed: 3_level_2
Afghanistan,1960,8994793.0,5.377778e+08
Afghanistan,1961,9164945.0,5.488889e+08
Afghanistan,1962,9343772.0,5.466667e+08
Afghanistan,1963,9531555.0,7.511112e+08
Afghanistan,1964,9728645.0,8.000000e+08
...,...,...,...
Zimbabwe,2011,14255592.0,1.095623e+10
Zimbabwe,2012,14565482.0,1.239272e+10
Zimbabwe,2013,14898092.0,1.349023e+10
Zimbabwe,2014,15245855.0,1.419691e+10


In [66]:
world_stacked.unstack(level=1)
world_stacked.unstack(level=-2)
world_stacked.unstack(level='year')

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Unnamed: 0_level_1,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Afghanistan,Population,8.994793e+06,9.164945e+06,9.343772e+06,9.531555e+06,9.728645e+06,9.935358e+06,1.014884e+07,1.036860e+07,1.059979e+07,1.084951e+07,...,2.518362e+07,2.587754e+07,2.652874e+07,2.720729e+07,2.796221e+07,2.880917e+07,2.972680e+07,3.068250e+07,3.162751e+07,3.252656e+07
Afghanistan,GDP,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,1.373333e+09,1.408889e+09,...,7.057598e+09,9.843842e+09,1.019053e+10,1.248694e+10,1.593680e+10,1.793024e+10,2.053654e+10,2.004633e+10,2.005019e+10,1.919944e+10
Albania,Population,,,,,,,,,,,...,2.992547e+06,2.970017e+06,2.947314e+06,2.927519e+06,2.913021e+06,2.904780e+06,2.900247e+06,2.896652e+06,2.893654e+06,2.889167e+06
Albania,GDP,,,,,,,,,,,...,8.992642e+09,1.070101e+10,1.288135e+10,1.204421e+10,1.192695e+10,1.289087e+10,1.231978e+10,1.278103e+10,1.327796e+10,1.145560e+10
Algeria,Population,1.112489e+07,1.140486e+07,1.169015e+07,1.198513e+07,1.229597e+07,1.262695e+07,1.298027e+07,1.335420e+07,1.374438e+07,1.414444e+07,...,3.374933e+07,3.426197e+07,3.481106e+07,3.540179e+07,3.603616e+07,3.671713e+07,3.743943e+07,3.818614e+07,3.893433e+07,3.966652e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Yemen, Rep.",GDP,,,,,,,,,,,...,1.908173e+10,2.563367e+10,3.039720e+10,2.845950e+10,3.090675e+10,3.107886e+10,3.207477e+10,3.595450e+10,,
Zambia,Population,3.049586e+06,3.142848e+06,3.240664e+06,3.342894e+06,3.449266e+06,3.559687e+06,3.674088e+06,3.792864e+06,3.916928e+06,4.047479e+06,...,1.238151e+07,1.273868e+07,1.311458e+07,1.350785e+07,1.391744e+07,1.434353e+07,1.478658e+07,1.524609e+07,1.572134e+07,1.621177e+07
Zambia,GDP,6.987397e+08,6.823597e+08,6.792797e+08,7.043397e+08,8.226397e+08,1.061200e+09,1.239000e+09,1.340639e+09,1.573739e+09,1.926399e+09,...,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026555e+10,2.345952e+10,2.550306e+10,2.804552e+10,2.713464e+10,2.120156e+10
Zimbabwe,Population,3.752390e+06,3.876638e+06,4.006262e+06,4.140804e+06,4.279561e+06,4.422132e+06,4.568320e+06,4.718612e+06,4.874113e+06,5.036321e+06,...,1.312794e+07,1.329780e+07,1.349546e+07,1.372100e+07,1.397390e+07,1.425559e+07,1.456548e+07,1.489809e+07,1.524586e+07,1.560275e+07


In [68]:
world_stacked.unstack(level=['year', 'country'])
world_stacked.unstack(level=[1, 0])

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
year,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,...,1969,1968,1967,1966,1965,1964,1963,1962,1961,1960
country,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,...,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe
Population,392022300.0,384222600.0,376504300.0,368802600.0,361031800.0,353112200.0,345054200.0,336886500.0,328766600.0,320906700.0,...,5036321.0,4874113.0,4718612.0,4568320.0,4422132.0,4279561.0,4140804.0,4006262.0,3876638.0,3752390.0
GDP,2530102000000.0,2873600000000.0,2846994000000.0,2773270000000.0,2497945000000.0,2103825000000.0,1798878000000.0,2081343000000.0,1641666000000.0,1404190000000.0,...,1747999000.0,1479600000.0,1397002000.0,1281750000.0,1311436000.0,1217138000.0,1159512000.0,1117602000.0,1096647000.0,1052990000.0


In [69]:
world_stacked.unstack(level=['country', 'year'])
world_stacked.unstack(level=[0, 1])

Unnamed: 0_level_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
country,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,Arab World,...,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe
year,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,...,1969,1968,1967,1966,1965,1964,1963,1962,1961,1960
Population,392022300.0,384222600.0,376504300.0,368802600.0,361031800.0,353112200.0,345054200.0,336886500.0,328766600.0,320906700.0,...,5036321.0,4874113.0,4718612.0,4568320.0,4422132.0,4279561.0,4140804.0,4006262.0,3876638.0,3752390.0
GDP,2530102000000.0,2873600000000.0,2846994000000.0,2773270000000.0,2497945000000.0,2103825000000.0,1798878000000.0,2081343000000.0,1641666000000.0,1404190000000.0,...,1747999000.0,1479600000.0,1397002000.0,1281750000.0,1311436000.0,1217138000.0,1159512000.0,1117602000.0,1096647000.0,1052990000.0


In [71]:
world_stacked.unstack(level='year', fill_value='LoL')

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Unnamed: 0_level_1,year,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Afghanistan,Population,8.99479e+06,9.16494e+06,9.34377e+06,9.53156e+06,9.72864e+06,9.93536e+06,1.01488e+07,1.03686e+07,1.05998e+07,1.08495e+07,...,2.51836e+07,2.58775e+07,2.65287e+07,2.72073e+07,2.79622e+07,2.88092e+07,2.97268e+07,3.06825e+07,3.16275e+07,3.25266e+07
Afghanistan,GDP,5.37778e+08,5.48889e+08,5.46667e+08,7.51111e+08,8e+08,1.00667e+09,1.4e+09,1.67333e+09,1.37333e+09,1.40889e+09,...,7.0576e+09,9.84384e+09,1.01905e+10,1.24869e+10,1.59368e+10,1.79302e+10,2.05365e+10,2.00463e+10,2.00502e+10,1.91994e+10
Albania,Population,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,...,2.99255e+06,2.97002e+06,2.94731e+06,2.92752e+06,2.91302e+06,2.90478e+06,2.90025e+06,2.89665e+06,2.89365e+06,2.88917e+06
Albania,GDP,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,...,8.99264e+09,1.0701e+10,1.28814e+10,1.20442e+10,1.1927e+10,1.28909e+10,1.23198e+10,1.2781e+10,1.3278e+10,1.14556e+10
Algeria,Population,1.11249e+07,1.14049e+07,1.16902e+07,1.19851e+07,1.2296e+07,1.2627e+07,1.29803e+07,1.33542e+07,1.37444e+07,1.41444e+07,...,3.37493e+07,3.4262e+07,3.48111e+07,3.54018e+07,3.60362e+07,3.67171e+07,3.74394e+07,3.81861e+07,3.89343e+07,3.96665e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Yemen, Rep.",GDP,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,LoL,...,1.90817e+10,2.56337e+10,3.03972e+10,2.84595e+10,3.09068e+10,3.10789e+10,3.20748e+10,3.59545e+10,LoL,LoL
Zambia,Population,3.04959e+06,3.14285e+06,3.24066e+06,3.34289e+06,3.44927e+06,3.55969e+06,3.67409e+06,3.79286e+06,3.91693e+06,4.04748e+06,...,1.23815e+07,1.27387e+07,1.31146e+07,1.35078e+07,1.39174e+07,1.43435e+07,1.47866e+07,1.52461e+07,1.57213e+07,1.62118e+07
Zambia,GDP,6.9874e+08,6.8236e+08,6.7928e+08,7.0434e+08,8.2264e+08,1.0612e+09,1.239e+09,1.34064e+09,1.57374e+09,1.9264e+09,...,1.27569e+10,1.4057e+10,1.79109e+10,1.53283e+10,2.02656e+10,2.34595e+10,2.55031e+10,2.80455e+10,2.71346e+10,2.12016e+10
Zimbabwe,Population,3.75239e+06,3.87664e+06,4.00626e+06,4.1408e+06,4.27956e+06,4.42213e+06,4.56832e+06,4.71861e+06,4.87411e+06,5.03632e+06,...,1.31279e+07,1.32978e+07,1.34955e+07,1.3721e+07,1.39739e+07,1.42556e+07,1.45655e+07,1.48981e+07,1.52459e+07,1.56028e+07


# # df.pivot() method.

#### df.pivot() return reshaped DataFrame organized by given index / column headers & values.

In [73]:
# Importing a new CSV

sales = pd.read_csv('Datasets/salesmen.csv')
sales.head()

Unnamed: 0,Date,Salesman,Revenue
0,1/1/16,Bob,7172
1,1/2/16,Bob,6362
2,1/3/16,Bob,5982
3,1/4/16,Bob,7917
4,1/5/16,Bob,7837


In [74]:
# finding the number of unique values in each column

sales.nunique()

Date         366
Salesman       5
Revenue     1676
dtype: int64

#### While pivoting a DataFrame it is a good practice to assign columns to the column with least unique values.

In [77]:
# Pivoting sales DataFrame.

sales_pivoted = sales.pivot(index='Date', columns='Salesman', values='Revenue')
sales_pivoted.head(10)

Salesman,Bob,Dave,Jeb,Oscar,Ronald
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1/1/16,7172,1864,4430,5250,2639
1/10/16,7543,7105,301,7663,8267
1/11/16,1053,6851,9489,8888,1340
1/12/16,4362,7147,8719,3092,279
1/13/16,6812,6160,2349,6139,7540
1/14/16,9047,7533,7690,5235,4214
1/15/16,3594,9883,6917,5507,5346
1/16/16,9800,3622,474,4947,9636
1/17/16,7215,4565,8963,3325,6058
1/18/16,7744,8246,2351,2611,3602


In [79]:
sales_pivoted.columns

Index(['Bob', 'Dave', 'Jeb', 'Oscar', 'Ronald'], dtype='object', name='Salesman')

# # df.pivot_table(index, columns, values, aggfunc) method.

#### df.pivot_table() can be used to aggrigate the values of a DataFrame.

In [85]:
# Importing a CSV.

foods = pd.read_csv('Datasets/foods.csv')
foods.head(3)

Unnamed: 0,First Name,Gender,City,Frequency,Item,Spend
0,Wanda,Female,Stamford,Weekly,Burger,15.66
1,Eric,Male,Stamford,Daily,Chalupa,10.56
2,Charles,Male,New York,Never,Sushi,42.14


In [86]:
# Getting a sum of expenditure by each gender item wise.

foods.pivot_table(index='Gender', columns='Item', values='Spend', aggfunc='sum')

Item,Burger,Burrito,Chalupa,Donut,Ice Cream,Sushi
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,4094.3,4257.82,4152.26,4743.0,4032.87,4683.08
Male,3671.43,4012.62,3492.26,4015.76,4854.12,4059.85


In [87]:
# Getting the average expenditure by each gender item wise.

foods.pivot_table(index='Gender', columns='Item', values='Spend', aggfunc='mean')

Item,Burger,Burrito,Chalupa,Donut,Ice Cream,Sushi
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,49.930488,50.092,54.635,49.926316,49.788519,50.355699
Male,49.613919,48.344819,49.186761,43.649565,51.096,55.614384


In [88]:
# # Getting a median of expenditure by each gender item wise.

foods.pivot_table(index='Gender', columns='Item', values='Spend', aggfunc='median')

Item,Burger,Burrito,Chalupa,Donut,Ice Cream,Sushi
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,47.9,48.46,53.355,52.47,46.98,50.08
Male,49.815,46.41,44.37,42.085,47.29,58.47


In [91]:
# Passing multiple indexes in "index" parameter.
# getting average expenditure by each gender in each city, item wise.

foods.pivot_table(index=['Gender', 'City'], columns='Item', values='Spend', aggfunc='mean')

Unnamed: 0_level_0,Item,Burger,Burrito,Chalupa,Donut,Ice Cream,Sushi
Gender,City,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Female,New York,51.626667,42.563043,46.135789,46.670323,56.356296,47.75129
Female,Philadelphia,52.87871,52.098571,52.291563,54.642,46.225625,58.096
Female,Stamford,45.037778,53.532647,64.094,48.734118,46.910455,45.622187
Male,New York,58.822273,55.976,49.1108,44.842333,55.297586,51.709259
Male,Philadelphia,44.675238,43.764333,48.444783,37.859394,53.44561,49.852857
Male,Stamford,46.424516,46.438929,50.011304,49.004483,42.3688,70.434444


In [92]:
# Passing mulitple indexes and column-Indexes to "index", "column" parameter.
# Getting the average expenditure by each gender in each city, by each item per frequency.

foods.pivot_table(index=['Gender', 'City'], columns=['Item', 'Frequency'], values='Spend', aggfunc='mean')

Unnamed: 0_level_0,Item,Burger,Burger,Burger,Burger,Burger,Burger,Burger,Burger,Burrito,Burrito,...,Ice Cream,Ice Cream,Sushi,Sushi,Sushi,Sushi,Sushi,Sushi,Sushi,Sushi
Unnamed: 0_level_1,Frequency,Daily,Monthly,Never,Often,Once,Seldom,Weekly,Yearly,Daily,Monthly,...,Weekly,Yearly,Daily,Monthly,Never,Often,Once,Seldom,Weekly,Yearly
Gender,City,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Female,New York,43.778333,57.286667,97.89,23.74,52.425,31.58,92.175,64.825,44.89,40.913333,...,56.905,37.9175,40.535,46.58,69.33,49.134286,32.358,87.7,51.36125,46.482
Female,Philadelphia,77.226667,53.7625,54.7425,48.135,40.17,58.435714,16.0,61.585,53.595,17.14,...,47.546667,39.965,58.088333,78.71,47.645,85.87,61.275,62.848,55.666667,48.616667
Female,Stamford,48.22,59.6225,45.485,43.635,31.683333,48.765,31.004,51.171667,39.126,67.94,...,25.006,15.24,56.181429,54.195,19.56,19.01,38.95,27.82,52.56,45.3425
Male,New York,63.892,62.43,90.32,27.735,59.314,75.226667,69.69,24.805,78.736667,49.18,...,44.772,53.233333,56.3925,53.8575,54.67,9.31,68.16,53.496667,50.05,39.813333
Male,Philadelphia,37.566667,71.046667,8.655,73.84,35.88,47.015,33.296667,49.34,41.44,29.86,...,55.31,47.988,46.821667,77.795,40.961667,41.7775,53.696667,91.26,52.066,33.8
Male,Stamford,49.43,13.58,,48.1525,36.293333,53.25,77.5525,45.014,69.0575,39.866667,...,47.884,32.106667,,93.03,68.3,44.22,62.93,74.716667,84.54,48.71


In [93]:
# Creating pivot tables directly from pandas module.

pd.pivot_table(data=foods, index=['Gender', 'City'], columns='Item', values='Spend', aggfunc='mean')

Unnamed: 0_level_0,Item,Burger,Burrito,Chalupa,Donut,Ice Cream,Sushi
Gender,City,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Female,New York,51.626667,42.563043,46.135789,46.670323,56.356296,47.75129
Female,Philadelphia,52.87871,52.098571,52.291563,54.642,46.225625,58.096
Female,Stamford,45.037778,53.532647,64.094,48.734118,46.910455,45.622187
Male,New York,58.822273,55.976,49.1108,44.842333,55.297586,51.709259
Male,Philadelphia,44.675238,43.764333,48.444783,37.859394,53.44561,49.852857
Male,Stamford,46.424516,46.438929,50.011304,49.004483,42.3688,70.434444


### It is a good practice to create the outermost pivot level to the column with least unique values.

In [89]:
foods.nunique()

First Name    198
Gender          2
City            3
Frequency       8
Item            6
Spend         950
dtype: int64

# # pd.melt(id_var, var_name, value_name) function.

#### pd.melt() unpivots the passed DataFrame. It leaves the columns passed in "id_var" parameter and turns the rest of the columns as row values in a "variable" column, and puts values in a "value" column.

#### pd.melt() turns a DataFrame from a wide form to a long form.

In [2]:
# Importing a CSV.

quarterly_sales = pd.read_csv('Datasets/quarters.csv')
quarterly_sales.head()

Unnamed: 0,Salesman,Q1,Q2,Q3,Q4
0,Boris,602908,233879,354479,32704
1,Bob,43790,514863,297151,544493
2,Tommy,392668,113579,430882,247231
3,Travis,834663,266785,749238,570524
4,Donald,580935,411379,110390,651572


In [96]:
# Melting the Q1, Q2, Q3, Q4 column headers.

pd.melt(quarterly_sales, id_vars='Salesman')    # id_vars, will keep the passed column.

Unnamed: 0,Salesman,variable,value
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879


In [97]:
# Naming the melted columns new columns & the values columns.

pd.melt(quarterly_sales, id_vars='Salesman', var_name='Quarter', value_name='Revenue')

Unnamed: 0,Salesman,Quarter,Revenue
0,Boris,Q1,602908
1,Bob,Q1,43790
2,Tommy,Q1,392668
3,Travis,Q1,834663
4,Donald,Q1,580935
5,Ted,Q1,656644
6,Jeb,Q1,486141
7,Stacy,Q1,479662
8,Morgan,Q1,992673
9,Boris,Q2,233879
