# Chapter 34: Styling Dataframes

In [2]:
import pandas as pd

In [3]:
url = 'https://github.com/mattharrison/datasets/raw/master'\
    '/data/dirtydevil.txt'
df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,
                 sep='\t')

  df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,


In [4]:
def tweak_river(df_):
    return (df_
 .assign(datetime=pd.to_datetime(df_.datetime))
 .rename(columns={'144166_00060': 'cfs',
                  '144167_00065': 'gage_height'})
 .set_index('datetime')
)

In [5]:
dd = tweak_river(df)
dd

Unnamed: 0_level_0,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-05-07 01:00:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:15:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:30:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:45:00,USGS,9333500,MDT,70.00,A:[91],,
2001-05-07 02:00:00,USGS,9333500,MDT,70.00,A:[91],,
...,...,...,...,...,...,...,...
2020-09-28 08:30:00,USGS,9333500,MDT,9.53,P,6.16,P
2020-09-28 08:45:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:00:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:15:00,USGS,9333500,MDT,9.20,P,6.15,P


## 34.2 Sparklines

- A sparkline is small plot drawn without axes or coordinates.
- The intent is to show a general trend

In [9]:
import sparklines

sparklines.sparklines(range(10))

['▁▂▃▃▄▅▆▆▇█']

In [10]:
flow_trend = ('cfs', lambda ser: sparklines.sparklines(ser
                                                        .resample('2D')
                                                        .median()
                                                        .fillna(0))[0]
)

In [12]:
agg_flow = (dd
 #.resample('M')  # resample .agg doesn't support named aggregations
 .groupby(pd.Grouper(freq='M'))
 .agg(cfs=('cfs', 'median'),
      total_flow=('cfs', lambda ser:(ser*15*60).sum()), 
      gage_height=('gage_height', 'median'),
      flow_trend=('cfs', lambda ser: sparklines.sparklines(
           ser
           .fillna(0)
           .resample('2D')
           .median()
           .fillna(0))
           [0])
     )
 .assign(quarterly_flow=lambda df_: df_
            .total_flow
            .resample('Q')
            .transform('sum'),
         percent_quarterly_flow=lambda df2_: df2_
            .total_flow / df2_.quarterly_flow,
        off_goal=lambda df3_: df3_.percent_quarterly_flow-.33,
        cost=lambda df4_: df4_.total_flow * .0002)
)

In [13]:
agg_flow.flow_trend

datetime
2001-05-31       █▅▅▃▅▄▄▃▂▃▂▁▁
2001-06-30     █▇▆▄▁▁▁▁▁▁▁▁▁▁▆
2001-07-31                 ▁█▄
2001-08-31               █▂▁▁▁
2001-09-30       ▆▆▁▁▅▆▁██▆▆▆▆
                    ...       
2020-05-31    ▇▅▇▆▅▆█▆▄▃▃▃▃▂▂▁
2020-06-30     ▄▅▅▅██▆▄▁▁▁▁▁▁▁
2020-07-31    ▁▁▁▁▂▂▂▂▂▂▂▂█▅█▅
2020-08-31    █▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁
2020-09-30      ▁▁▁▁▁▁▅▅▅▅▆▇██
Freq: M, Name: flow_trend, Length: 233, dtype: object

## 34.3 The .style Attribute

In [14]:
(agg_flow
 .reset_index()
 .style
 # after this we are not working a a dataframe but a "styler" object
 .format({'cost': '${:,.2f}', 'datetime': '{:%Y/%m}/01',
          'percent_quarterly_flow': '{:.1%}',
          'off_goal': '{:+.1%}',
          **{col: '{:.1f}' for col in ['cfs', 'total_flow', 'quarterly_flow']}},
          na_rep='Missing')
 .bar(subset='cfs', color='#c07fef', vmax=agg_flow.cfs.quantile(.95)) 
 .bar(subset='off_goal', color=['red', 'green'], align='mid') 
 .highlight_null(null_color='#fef70c')  # wish this was highlight_na
 .highlight_max(axis=0, color='green')
 .background_gradient(axis=0, cmap='Reds', subset='cost', vmin=1_000, vmax=25_000)
 .set_caption('Dirty Devil Summary')
 .set_properties(**{'background-color': '#999'}, subset='datetime')
 .applymap(lambda val: f'color: "grey"; opacity: 80%; background-color:{"#4589ae" if val > 0 else "#c07fef"}' ,
           subset='cfs')
 # .set_table_styles breaks stick headers if it is after
 .set_table_styles([{'selector': 'td:hover', 'props': 'background-color: pink; font-size:14pt;'}])
 .set_sticky(axis="columns")
 .hide_index()
)


  (agg_flow
  .hide_index()


datetime,cfs,total_flow,gage_height,flow_trend,quarterly_flow,percent_quarterly_flow,off_goal,cost
2001/05/01,47.0,105383700.0,Missing,█▅▅▃▅▄▄▃▂▃▂▁▁,123227100.0,85.5%,+52.5%,"$21,076.74"
2001/06/01,23.0,17843400.0,Missing,█▇▆▄▁▁▁▁▁▁▁▁▁▁▆,123227100.0,14.5%,-18.5%,"$3,568.68"
2001/07/01,17.0,7781400.0,Missing,▁█▄,243448920.0,3.2%,-29.8%,"$1,556.28"
2001/08/01,52.5,192848220.0,Missing,█▂▁▁▁,243448920.0,79.2%,+46.2%,"$38,569.64"
2001/09/01,26.0,42819300.0,Missing,▆▆▁▁▅▆▁██▆▆▆▆,243448920.0,17.6%,-15.4%,"$8,563.86"
2001/10/01,54.0,134975700.0,Missing,▁▁▂▂▃▄▄▅▅▅▆▇▇▇█,480483900.0,28.1%,-4.9%,"$26,995.14"
2001/11/01,104.0,203110200.0,Missing,▆▆▆▆▆▇▇▇██▁▁▁▇▇,480483900.0,42.3%,+9.3%,"$40,622.04"
2001/12/01,115.0,142398000.0,Missing,▆▁▁▁▇▇▁▁▁▆▆▆▁▆▇█,480483900.0,29.6%,-3.4%,"$28,479.60"
2002/01/01,136.0,197745300.0,Missing,██▇███▇▇▁▁▁▁▁▁▇,638525700.0,31.0%,-2.0%,"$39,549.06"
2002/02/01,131.0,157920300.0,Missing,▄▄█▆▆█▁,638525700.0,24.7%,-8.3%,"$31,584.06"
