## Hexbin plot are 2D histograms where bins are hexagons and color represents the number of datapoints within each bin

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_wine

In [2]:
X, y = load_wine(return_X_y=True, as_frame=True)

### Getting started with hexbin plot using matplotlib

In [3]:
plt.hexbin(x=X['alcohol'], y=X['malic_acid'], gridsize=(20,15), cmap='magma');
plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-1.png'>

### pandas oneliner to the rescue

In [4]:
X.plot(x='alcohol', y='malic_acid', kind='hexbin', gridsize=(15,10), title="Hexbin-plot alcohol vs malic-acid");

<img src='./plots/hexbin_plot-2.png'>

### modify line properties

In [5]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='magma',
    linewidths=1.5, edgecolors='white'
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-3.png'>

In [6]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='Greens',
    linewidths=1.5, edgecolors='k', linestyle='dotted'
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-4.png'>

In [7]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='YlOrRd',
    linewidths=1.5, edgecolors='k', linestyle='dashed'
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-5.png'>

In [8]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='gist_heat',
    linewidths=1.5, edgecolors='k', linestyle='dashed', alpha=0.5
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-6.png'>

### I want to see hexbins with atleast one count or greater | use `mincnt`

In [9]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='magma',
    linewidths=1.5, edgecolors='white', mincnt=1
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-7.png'>

### Use `vmin` and `vmax` to control the range of values

In [10]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='magma',
    linewidths=1.5, edgecolors='white', vmin=1, vmax=3
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-8.png'>

### Use `mincnt`, `vmin` and `vmax` to zero in toward region of interest

In [11]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), cmap='magma',
    linewidths=1.5, edgecolors='white', mincnt=1, vmin=1, vmax=3
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-9.png'>

### Distribution of a third variable based on selected variables

use the `C` parameter to pass the third variable

I want to the distribution of `wine-type` based on `alcohol` & `malic_acid`

In [12]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), 
    linewidths=1.5, edgecolors='white', C=y
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-10.png'>

### Change the aggregation method from `mean` to `median`

use `reduce_C_function=np.median`

In [13]:
plt.hexbin(
    x=X['alcohol'], y=X['malic_acid'], gridsize=(15,10), 
    linewidths=1.5, edgecolors='white', C=y, reduce_C_function=np.median
);

plt.title('Hexbin plot alcohol vs malic acid', fontsize=20)
plt.xlabel('Alcohol', fontsize=16, fontweight='bold')
plt.ylabel('Malic acid', fontsize=16, fontweight='bold')
ax = plt.gca()
ax.spines[['bottom','top','left','right']].set_visible(False)
plt.colorbar()
plt.show()

<img src='./plots/hexbin_plot-11.png'>

In [9]:
X[['alcohol','proline']].agg(['min','max'])

Unnamed: 0,alcohol,proline
min,11.03,278.0
max,14.83,1680.0


In [23]:
ax = X[['alcohol','proline']].plot(
    kind='hexbin', x='alcohol', y='proline', 
    gridsize=(15,16), edgecolor='k', mincnt=1,
    title='Hexbin plot Alcohol vs Proline')
ax.spines[['top','left','bottom','right']].set_visible(False)

<img src='./plots/hexbin_plot-12.png'>

### distribution of wine class -- based on alcohol vs proline 

In [55]:
df = X.copy()
df['wine-type'] = y

df.plot(kind='scatter', x='alcohol', y='proline', c='wine-type', cmap='winter')

<img src='./plots/scatter_plot-13.png'>

In [60]:
# distribution of wine class -- based on alcohol vs proline 



ax = df[['alcohol','proline','wine-type']].plot(
    kind='hexbin', x='alcohol', y='proline', 
    gridsize=(15,16), edgecolor='k', C='wine-type', cmap='winter',
    title='Hexbin plot Alcohol vs Proline')
ax.spines[['top','left','bottom','right']].set_visible(False)

<img src='./plots/hexbin_plot-14.png'>

In [61]:
# distribution of wine class -- based on alcohol vs proline 
# mincnt = 1

ax = df[['alcohol','proline','wine-type']].plot(
    kind='hexbin', x='alcohol', y='proline', mincnt=1,
    gridsize=(15,16), edgecolor='k', C='wine-type', cmap='winter',
    title='Hexbin plot Alcohol vs Proline')
ax.spines[['top','left','bottom','right']].set_visible(False)

<img src='./plots/hexbin_plot-15.png'>