In [118]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np

**A Brief matplotlib API Primer**

In [6]:
data = np.arange(10)

In [7]:
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1ffa9c5e610>]

In [30]:
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
plt.plot(np.random.randn(50).cumsum(), 'k--')
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
fig.subplots_adjust(left=None, bottom=None, right=None, top=None,
wspace=None, hspace=None)

<IPython.core.display.Javascript object>

In [27]:
fig, axes = plt.subplots(2, 3)
fig.tight_layout(pad=1.0)
axes
fig.subplots_adjust(left=None, bottom=None, right=None, top=None,
wspace=None, hspace=None)

<IPython.core.display.Javascript object>

**Adjusting the spacing around subplots**

In [43]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0.2, hspace=0.2)

<IPython.core.display.Javascript object>

**Colors, Markers, and Line Styles**

Matplotlib’s main plot function accepts arrays of x and y coordinates and optionally a
string abbreviation indicating color and line style. For example, to plot x versus y
with green dashes, you would execute:
    
ax.plot(x, y, 'g--')

This way of specifying both color and line style in a string is provided as a conve‐
nience; in practice if you were creating plots programmatically you might prefer not
to have to munge strings together to create plots with the desired style. The same plot
could also have been expressed more explicitly as:
    
ax.plot(x, y, linestyle='--', color='g')

There are a number of color abbreviations provided for commonly used colors, but
you can use any color on the spectrum by specifying its hex code (e.g., '#CECECE').
You can see the full set of line styles by looking at the docstring for plot (use plot? in
IPython or Jupyter).



In [39]:
from numpy.random import randn

In [47]:
fig = plt.figure()
plt.plot(randn(30).cumsum(), 'ko--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1ffab187eb0>]

In [51]:
fig = plt.figure()
plt.plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1ffab36adc0>]

In [54]:
data = np.random.randn(30).cumsum()

In [55]:
fig = plt.figure()
plt.plot(data, 'k--', label='Default')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1ffab101d60>]

In [60]:
fig = plt.figure()
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1ffab08bb50>

**Ticks, Labels, and Legends**

**Setting the title, axis labels, ticks, and ticklabels**

In [69]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum())
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=30, fontsize='small')
# ax.set_title('My first matplotlib plot')
# ax.set_xlabel('Stages')
props = {
 'title': 'My first matplotlib plot',
 'xlabel': 'Stages'
}
ax.set(**props)


<IPython.core.display.Javascript object>

[Text(0.5, 0, 'Stages'), Text(0.5, 1.0, 'My first matplotlib plot')]

**Adding legends**

In [70]:
from numpy.random import randn

In [77]:
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
ax.legend(loc='best')


<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1ffadbc7e80>

**Annotations and Drawing on a Subplot**

In [78]:
# Annotations and Drawing on a Subplot
ax.text(x, y, 'Hello world!', family='monospace', fontsize=10)

NameError: name 'x' is not defined

In [81]:
from datetime import datetime
import pandas as pd

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
data = pd.read_csv('./book-support/examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']
spx.plot(ax=ax, style='k-')
crisis_data = [
 (datetime(2007, 10, 11), 'Peak of bull market'),
 (datetime(2008, 3, 12), 'Bear Stearns Fails'),
 (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
    xytext=(date, spx.asof(date) + 225),
    arrowprops=dict(facecolor='black', headwidth=4, width=2,
    headlength=4),
    horizontalalignment='left', verticalalignment='top')
# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])
ax.set_title('Important dates in the 2008-2009 financial crisis')


<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [82]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
         color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x1ffadc34b20>

**Saving Plots to File**

In [84]:
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

In [85]:
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

**matplotlib Configuration**

One way to modify the
configuration programmatically from Python is to use the rc method

In [88]:
plt.rc('figure', figsize=(10, 10))

font_options = {'family' : 'monospace',
                'weight' : 'bold',
                'size' : 'small'}
plt.rc('font', **font_options)

For more extensive customization and to see a list of all the options, matplotlib comes
with a configuration file matplotlibrc in the matplotlib/mpl-data directory. If you cus‐
tomize this file and place it in your home directory titled .matplotlibrc, it will be
loaded each time you use matplotlib.


**Plotting with pandas and seaborn**

In [122]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot(figsize=(5,5))


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffbbee6700>

In [123]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
        columns=['A', 'B', 'C', 'D'],
        index=np.arange(0, 100, 10))

In [125]:
df.plot(figsize=(5,5))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffbbf9d760>

**Bar Plots**

In [133]:
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7, figsize=(5,5))
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffc2d89fa0>

In [134]:
df = pd.DataFrame(np.random.rand(6, 4),
     index=['one', 'two', 'three', 'four', 'five', 'six'],
     columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))

In [135]:
df

Genus,A,B,C,D
one,0.117148,0.680674,0.110838,0.190514
two,0.41663,0.542309,0.739417,0.627524
three,0.333411,0.01177,0.960502,0.648293
four,0.147167,0.913639,0.401888,0.902756
five,0.601807,0.055058,0.921588,0.351982
six,0.598773,0.781502,0.832277,0.926921


In [138]:
df.plot.bar(figsize=(5,5))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffc45ce220>

In [144]:
df.plot.barh(stacked=True, alpha=0.5, figsize=(5,5))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffc9426070>

In [148]:
fig, axes = plt.subplots(1, 1)
s.value_counts().plot.barh(figsize=(5,5))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffbb1bb2b0>

In [151]:
tips = pd.read_csv('./book-support/examples/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])

In [152]:
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [154]:
party_counts = party_counts.loc[:, 2:5]

In [155]:
party_counts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,16,1,1,0
Sat,53,18,13,1
Sun,39,15,18,3
Thur,48,4,5,1


Then, normalize so that each row sums to 1 and make the plot

In [156]:
party_pcts = party_counts.div(party_counts.sum(1), axis=0)

In [157]:
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [170]:
party_pcts.plot.bar(figsize=(5,5))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffd695a7c0>

In [163]:
!pip install seaborn


Collecting seaborn
  Downloading seaborn-0.10.1-py3-none-any.whl (215 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.10.1


In [164]:
import seaborn as sns

In [166]:
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

In [178]:
import matplotlib.pyplot as plt

fig= plt.figure(figsize=(6,3))
sns.barplot(x='tip_pct', y='day', data=tips, orient='h');

<IPython.core.display.Javascript object>

In [179]:
fig= plt.figure(figsize=(6,3))
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffdea74970>

**Histograms and Density Plots**

In [181]:
fig = plt.figure(figsize=(6,3))
tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffdfa046d0>

A related plot type is a density plot, which is formed by computing an estimate of a
continuous probability distribution that might have generated the observed data. The
usual procedure is to approximate this distribution as a mixture of “kernels”—that is,
simpler distributions like the normal distribution. Thus, density plots are also known
as kernel density estimate (KDE) plots. Using plot.kde makes a density plot using
the conventional mixture-of-normals estimate

In [182]:
fig = plt.figure(figsize=(6,3))
tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffdfad3700>

In [184]:
fig = plt.figure(figsize=(6,3))
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))
sns.distplot(values, bins=100, color='k')

# Seaborn makes histograms and density plots even easier through its distplot
# method, which can plot both a histogram and a continuous density estimate simulta‐
# neously. As an example, consider a bimodal distribution consisting of draws from
# two different standard normal distributions

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1ffd6a512b0>

**Scatter or Point Plots**

In [185]:
macro = pd.read_csv('./book-support/examples/macrodata.csv')

In [186]:
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]

In [187]:
trans_data = np.log(data).diff().dropna()

In [194]:
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


We can then use seaborn’s regplot method, which makes a scatter plot and fits a linear regression line

In [203]:
fig = plt.figure(figsize=(6,3))
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))


<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Changes in log m1 versus log unemp')

In [210]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2}, height=2)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1fff40cb580>

**Facet Grids and Categorical Data**

In [222]:
sns.catplot(x='day', y='tip_pct',
            hue='time', col='smoker',
            kind='bar', data=tips[tips.tip_pct < 1],
            height=3)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1ffdbc79970>

In [223]:
sns.catplot(x='day', y='tip_pct',
            row='time', col='smoker',
            kind='bar', data=tips[tips.tip_pct < 1],
            height=3)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1ffdc5dcd90>

In [226]:
sns.catplot(x='tip_pct', y='day', kind='box', data=tips[tips.tip_pct < 0.5]);

<IPython.core.display.Javascript object>

**Other Python Visualization Tools**

With tools like Bokeh and
Plotly, it’s now possible to specify dynamic, interactive graphics in Python that are
destined for a web browser.


For creating static graphics for print or web, I recommend defaulting to matplotlib
and add-on libraries like pandas and seaborn for your needs. For other data visualiza‐
tion requirements, it may be useful to learn one of the other available tools out there.
I encourage you to explore the ecosystem as it continues to involve and innovate into
the future.