https://stackoverflow.com/questions/23377108/pandas-percentage-of-total-with-groupby

In [1]:
import random
import string
import pandas as pd
import numpy as np
np.random.seed(0)

# This is the total number of groups to be created
NumberOfGroups = 50000

# Create a lot of groups (random strings of 4 letters)
Group1     = [''.join(random.choice(string.ascii_uppercase) 
                for _ in range(4)) for x in range(NumberOfGroups//10)]*10

Group2     = [''.join(random.choice(string.ascii_uppercase) 
                for _ in range(4)) for x in range(NumberOfGroups//2)]*2

FinalGroup = [''.join(random.choice(string.ascii_uppercase) 
                for _ in range(4)) for x in range(NumberOfGroups)]

# Make the numbers
NumbersForPercents = [np.random.randint(100, 999) for _ in range(NumberOfGroups)]

# Make the dataframe
df = pd.DataFrame({'Group 1': Group1,
                   'Group 2': Group2,
                   'Final Group': FinalGroup,
                   'Numbers I want as percents': NumbersForPercents})

df.head()

Unnamed: 0,Group 1,Group 2,Final Group,Numbers I want as percents
0,LCJE,XASW,VPJL,784
1,KWRR,GTPI,WPMV,659
2,IXYO,PJQU,ULWK,729
3,NAGF,EZRO,BBRR,292
4,TUFK,NPKR,BPHM,935


In [2]:
# Initial grouping (basically a sorted version of df)
PreGroupby_df = df.groupby(["Group 1","Group 2","Final Group"]).agg({'Numbers I want as percents': 'sum'}).reset_index()

# Get the sum of values for the "final group", append "_Sum" to it's column name, and change it into a dataframe (.reset_index)
SumGroup_df = df.groupby(["Group 1","Group 2"]).agg({'Numbers I want as percents': 'sum'}).add_suffix('_Sum').reset_index()

# Merge the two dataframes
Percents_df = pd.merge(PreGroupby_df, SumGroup_df)

# Divide the two columns
Percents_df["Percent of Final Group"] = Percents_df["Numbers I want as percents"] / Percents_df["Numbers I want as percents_Sum"] * 100

# Drop the extra _Sum column
Percents_df.drop(["Numbers I want as percents_Sum"], inplace=True, axis=1)

In [3]:
state_office = df.groupby(['Group 1','Group 2','Final Group']).agg({'Numbers I want as percents': 'sum'})

state_pcts = state_office.groupby(level=['Group 1','Group 2']).apply(lambda x: 100 * x / float(x.sum()))

In [4]:
df = pd.DataFrame({'state': ['CA', 'WA', 'CO', 'AZ'] * 3,
               'office_id': list(range(1, 7)) * 2,
               'sales': [np.random.randint(100000, 999999) for _ in range(12)]})

df.head()

Unnamed: 0,state,office_id,sales
0,CA,1,297165
1,WA,2,781450
2,CO,3,238940
3,AZ,4,343315
4,CA,5,280502


In [5]:
result = pd.crosstab(index=df['state'], 
                     columns=df['office_id'], 
                     values=df['sales'], 
                     aggfunc='sum', 
                     normalize='index').applymap('{:.2f}%'.format)




print(result)

office_id      1      2      3      4      5      6
state                                              
AZ         0.00%  0.37%  0.00%  0.16%  0.00%  0.47%
CA         0.28%  0.00%  0.45%  0.00%  0.27%  0.00%
CO         0.52%  0.00%  0.13%  0.00%  0.35%  0.00%
WA         0.00%  0.45%  0.00%  0.23%  0.00%  0.32%


In [6]:
result = pd.crosstab(index=df['state'], 
                     columns=df['office_id'], 
                     values=df['sales'], 
                     aggfunc='sum'
                    )




print(result)

office_id         1         2         3         4         5         6
state                                                                
AZ              NaN  789538.0       NaN  343315.0       NaN  984643.0
CA         297165.0       NaN  468434.0       NaN  280502.0       NaN
CO         941115.0       NaN  238940.0       NaN  640671.0       NaN
WA              NaN  781450.0       NaN  409238.0       NaN  556211.0
