# Multidimensional cumulative histograms with pandas and plotting with faceting with seaborn

## 1. import libraries

In [None]:
import pandas as pd
import numpy as np
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
matplotlib.style.use('ggplot')

## 2. load input table

In [None]:
input_path = 'https://raw.githubusercontent.com/TaiSakuma/cumsum_example_01/master/tbl_n.process.htbin.metbin.minChi.txt'
tbl = pd.read_table(input_path, delim_whitespace=True)

In [None]:
tbl.head()

## 3. draw histograms with faceting with seaborn

In [None]:
with np.errstate(divide='ignore'):
    tbl['log10n'] = np.log10(tbl['n'])

In [None]:
yaixs_range = [-10, 5]
tbl.loc[tbl['log10n'] == -np.inf, 'log10n'] = yaixs_range[0] - 1

In [None]:
g = sns.FacetGrid(tbl, row="htbin", col="metbin", hue='process', margin_titles=True)
plt.ylim(*yaixs_range)
g.map(plt.step, 'minChi', 'log10n', where='post')

## 4. calculate cumulative histograms in multipe dimensions

### 4.1 fill missing categories

In [None]:
tbl.groupby('process').size()

In [None]:
keys = ['process', 'htbin', 'metbin', 'minChi']
tbl_mesh = pd.DataFrame(list(itertools.product(*[np.sort(tbl[c].unique()) for c in keys])))
tbl_mesh.columns = keys
tbl = pd.merge(tbl_mesh, tbl, how='left')
tbl.fillna(0, inplace=True)
tbl.groupby('process').size()

### 4.2 calculate cumulative sum

In [None]:
tbl['cumn'] = tbl['n']
tbl['cumn'] = tbl[::-1].groupby(['process', 'htbin', 'metbin'])['cumn'].cumsum()[::-1]
tbl['cumn'] = tbl[::-1].groupby(['process', 'htbin', 'minChi'])['cumn'].cumsum()[::-1]
tbl['cumn'] = tbl[::-1].groupby(['process', 'metbin', 'minChi'])['cumn'].cumsum()[::-1]

### 4.3 stack processes (cumulative sum in non-numeric dimension)

In [None]:
tbl['cumn'] = tbl[::-1].groupby(['htbin', 'metbin', 'minChi'])['cumn'].cumsum()[::-1]

## 5. draw with faceting with seaborn

In [None]:
with np.errstate(divide='ignore'):
    tbl['log10cumn'] = np.log10(tbl['cumn'])

In [None]:
g = sns.FacetGrid(tbl, row="htbin", col="metbin", hue='process', margin_titles=True)
g.map(plt.step, 'minChi', 'log10cumn', where='post')