In [1]:
import pandas as pd

In [2]:
url = 'https://github.com/datagy/pivot_table_pandas/raw/master/sample_pivot.xlsx'
df = pd.read_excel(url)

In [3]:
df.head()

Unnamed: 0,Date,Region,Type,Units,Sales
0,2020-07-11,East,Children's Clothing,18.0,306
1,2020-09-23,North,Children's Clothing,14.0,448
2,2020-04-02,South,Women's Clothing,17.0,425
3,2020-02-28,East,Children's Clothing,26.0,832
4,2020-03-19,West,Women's Clothing,3.0,33


In [4]:
pv1 = pd.pivot_table(df, index='Region')

In [5]:
pv1

Unnamed: 0_level_0,Sales,Units
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,408.182482,19.73236
North,438.924051,19.202643
South,432.956204,20.423358
West,452.029412,19.294118


In [6]:
pv2 = pd.pivot_table(df, index='Region', values='Sales')

In [7]:
pv2

Unnamed: 0_level_0,Sales
Region,Unnamed: 1_level_1
East,408.182482
North,438.924051
South,432.956204
West,452.029412


Note: mean values provided by default.

Additional Arguments

In [8]:
pv3 = pd.pivot_table(df, index='Region', values='Sales', aggfunc='sum')

In [9]:
pv3

Unnamed: 0_level_0,Sales
Region,Unnamed: 1_level_1
East,167763
North,138700
South,59315
West,61476


What if you want average as well as total?

In [10]:
pv3 = pd.pivot_table(df, index='Region', values='Sales', aggfunc=['sum', 'mean'])

In [11]:
pv3

Unnamed: 0_level_0,sum,mean
Unnamed: 0_level_1,Sales,Sales
Region,Unnamed: 1_level_2,Unnamed: 2_level_2
East,167763,408.182482
North,138700,438.924051
South,59315,432.956204
West,61476,452.029412


In [13]:
pv3.index

Index(['East', 'North', 'South', 'West'], dtype='object', name='Region')

What if we want to break out both by Region as well as type of sale? 

In [15]:
pv4 = pd.pivot_table(df, index='Region', columns='Type', values='Sales')

In [16]:
pv4

Type,Children's Clothing,Men's Clothing,Women's Clothing
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East,405.743363,423.647541,399.028409
North,438.894118,449.157303,432.528169
South,412.666667,475.435897,418.924528
West,480.52381,465.292683,419.188679


In [17]:
pv5 = pd.pivot_table(df, index='Region', columns='Type', values='Units', 
                     fill_value=0)

In [18]:
pv5

Type,Children's Clothing,Men's Clothing,Women's Clothing
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East,20.513274,19.836066,19.159091
North,20.741176,0.0,18.28169
South,22.6,18.589744,19.924528
West,18.785714,20.219512,18.981132


Add column and row totals (using *margins* argument).  
Say we want to know the total sales that occurred across each region, as well as across the different types of clothing that were sold within the different categories.

In [21]:
# pv6 = pd.pivot_table(df, index='Region', columns='Type', values='Sales', 
#                      aggfunc='sum', margins=False)
# # Note: no 'All' column

In [25]:
pv6 = pd.pivot_table(df, index='Region', columns='Type', values='Sales', 
                     aggfunc='sum', margins=True)
# margins=True adds an 'All' column

In [26]:
pv6

Type,Children's Clothing,Men's Clothing,Women's Clothing,All
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East,45849,51685,70229,167763
North,37306,39975,61419,138700
South,18570,18542,22203,59315
West,20182,19077,22217,61476
All,121907,129279,176068,427254


Rename the margins column using 'margins_name':

In [27]:
pv6 = pd.pivot_table(df, index='Region', columns='Type', values='Sales', 
                     aggfunc='sum', margins=True, margins_name='Total')

In [28]:
pv6

Type,Children's Clothing,Men's Clothing,Women's Clothing,Total
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East,45849,51685,70229,167763
North,37306,39975,61419,138700
South,18570,18542,22203,59315
West,20182,19077,22217,61476
Total,121907,129279,176068,427254
