# seaborn (heatmap) 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [None]:
print(f"pandas  version = {pd.__version__}")
print(f"seaborn version = {sns.__version__}")
print(f"numpy   version = {np.__version__}")

## Ex 1. จำนวนบัตรพลาสติกในประเทศไทย
http://www2.bot.or.th/statistics/BOTWEBSTAT.aspx?reportID=685&language=TH

In [None]:
df=pd.read_excel('https://github.com/prasertcbs/basic-dataset/raw/master/plastic_cards_in_Thailand.xlsx',
                index_col='Card')
df

In [None]:
sns.heatmap(df)

In [None]:
plt.figure(figsize=(8,4))
cmap = sns.diverging_palette(h_neg=0, h_pos=240, as_cmap=True) # hue
sns.heatmap(df,
            center=0,
            annot=True, fmt=',.0f',
            linewidths=.5,
            cmap=cmap)

In [None]:
df.applymap(lambda n: n / 1e6).round(1)

In [None]:
df

In [None]:
df=df.applymap(lambda n: n / 1e6).round(1)

In [None]:
plt.figure(figsize=(8,5))
cmap = sns.diverging_palette(h_neg=110, h_pos=130, as_cmap=True) # cmap = color map
sns.heatmap(df,
            annot=True, fmt='.1f',
            center=0,
            linewidths=.5,
            cmap=cmap)
plt.title('# of plastic cards (in millions)');

## Ex 2: Monthly sales

In [None]:
import calendar
df=pd.DataFrame(np.random.randint(low=10, high=101, size=(3, 12)),
                index=['mocha', 'latte', 'espresso'],
                columns=[calendar.month_abbr[i] for i in range(1, 13)])
df

In [None]:
np.random.randint(low=10, high=101, size=(3, 12))

In [None]:
df=pd.DataFrame(np.random.randint(low=10, high=101, size=(3, 12)))
df

In [None]:
import calendar
m_abbr = [calendar.month_abbr[i] for i in range(1, 13)]
m_abbr

In [None]:
df.columns=m_abbr
df

In [None]:
df.rename({0:'mocha', 1:'latte', 2:'espresso'}, inplace=True)
df

In [None]:
sns.heatmap(df)

In [None]:
plt.figure(figsize=(8,4))
cmap = sns.diverging_palette(h_neg=0, h_pos=120, as_cmap=True)
sns.heatmap(df,
            center=0,
            annot=True, fmt='.0f',
            linewidths=.5,
            cmap=cmap)

In [None]:
import calendar
df=pd.DataFrame(np.random.randint(low=-50, high=101, size=(3, 12)),
                index=['mocha', 'latte', 'espresso'],
                columns=[calendar.month_abbr[i] for i in range(1, 13)])
df

In [None]:
plt.figure(figsize=(8,4))
cmap = sns.diverging_palette(h_neg=0, h_pos=120, as_cmap=True)
sns.heatmap(df,
            center=0,
            annot=True, fmt='.0f',
            linewidths=.5,
            cmap=cmap)

## read miles per gallon dataset (mpg.csv)

In [None]:
url='https://github.com/prasertcbs/tutorial/raw/master/mpg.csv'
df=pd.read_csv(url)
df.head()

In [None]:
df['gear']=df.trans.str[0].map({'a':'auto', 'm':'manual'}).astype("category")
df['drv']=df.drv.map({'f':'front', 'r':'rear', '4':'4-wheel'}).astype('category')
df.head()

## Ex 3: create heatmap from crosstab table

In [None]:
xtab=pd.crosstab(df.drv, df['class'])
xtab
#sns.heatmap(xtab)

In [None]:
xtab['compact']

In [None]:
xtab=xtab.applymap(lambda n: (n-xtab['compact'][0])/xtab['compact'][0]).round(1)
xtab

In [None]:
sns.heatmap(xtab)

In [None]:
cmap = sns.diverging_palette(h_neg=10, h_pos=20, as_cmap=True)
sns.heatmap(xtab,
            center=0,
            annot=True, fmt='.2f',
            linewidths=.5,
            cmap=cmap)

In [None]:
sns.heatmap(xtab,
            center=0,
            annot=True, fmt='.2f',
            linewidths=.5,
            cmap='RdGy')

In [None]:
sns.heatmap(xtab,
            center=0,
            annot=True, fmt='.2f',
            linewidths=.5,
            cmap='YlOrRd')

## Ex 4: create heatmap from correlation matrix

In [None]:
url='https://github.com/prasertcbs/tutorial/raw/master/mpg.csv'
df=pd.read_csv(url)
df.head()

In [None]:
df['gear']=df.trans.str[0].map({'a':'auto', 'm':'manual'}).astype("category")
df['drv']=df.drv.map({'f':'front', 'r':'rear', '4':'4-wheel'}).astype('category')
df.head()

In [None]:
df.corr()

In [None]:
dg=df[['displ', 'cyl', 'cty', 'hwy']].corr()
dg

In [None]:
sns.heatmap(dg)

In [None]:
mask = np.zeros_like(dg)
mask

In [None]:
mask[np.triu_indices_from(mask)] = True
mask

In [None]:
sns.heatmap(dg, mask=mask, vmin=-1, vmax=1, square=True)

In [None]:
sns.heatmap(dg, mask=mask, vmin=-1, vmax=1, square=True,
            center=0,
            annot=True)

In [None]:
sns.heatmap(dg, mask=mask, vmin=-1, vmax=1, square=True,
            center=0,
            annot=True,
            fmt='.2f',
            linewidths=.5)

In [None]:
# http://seaborn.pydata.org/tutorial/color_palettes.html
# use hsl color in PowerPoint 
cmap = sns.diverging_palette(h_neg=0, h_pos=120, as_cmap=True)
sns.heatmap(dg, mask=mask, vmin=-1, vmax=1, square=True,
            center=0,
            annot=True, fmt='.2f',
            linewidths=.5,
            cmap=cmap)