In [1]:
%matplotlib notebook

# Brief matplotlib API Primer

In [2]:
import matplotlib.pyplot as plt

In [3]:
import numpy as np
import pandas as pd

In [4]:
data = np.arange(10)

In [5]:
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
plt.plot(data);

<IPython.core.display.Javascript object>

## Figures and Subplots

In [146]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [147]:
ax1 = fig.add_subplot(2, 2, 1)

In [148]:
ax2 = fig.add_subplot(2, 2, 2)

In [149]:
ax3 = fig.add_subplot(2, 2, 3)

In [151]:
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

<IPython.core.display.Javascript object>

In [152]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x225458cb400>]

In [153]:
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

In [154]:
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x225458f2a70>

In [155]:
fig, axes = plt.subplots(2, 3)

<IPython.core.display.Javascript object>

In [156]:
axes

array([[<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>],
       [<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>]], dtype=object)

### Adjusting the spacing around subplots

In [157]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

## Colors, Markers, and Line Styles

In [46]:
ax.plot(x, y, 'g--')

NameError: name 'x' is not defined

In [47]:
ax.plot(x, y, linestyle='--', color='g')

NameError: name 'x' is not defined

In [48]:
plt.plot?

In [158]:
from numpy.random import randn

In [159]:
plt.plot(randn(30).cumsum(), 'ko--');

<IPython.core.display.Javascript object>

In [160]:
plt.plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2253203a860>]

In [161]:
data = np.random.randn(30).cumsum()

In [162]:
plt.plot(data, 'k--', label='Default')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x22544e40490>]

In [163]:
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')

[<matplotlib.lines.Line2D at 0x22544ec6170>]

In [164]:
plt.legend(loc='best')

<matplotlib.legend.Legend at 0x22544ec6620>

## Ticks, Labels, and Legends

### Setting the title, axis labels, ticks, and ticklabels

In [165]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [166]:
ax = fig.add_subplot(1, 1, 1)

In [167]:
ax.plot(np.random.randn(1000).cumsum())

[<matplotlib.lines.Line2D at 0x22543244b80>]

In [168]:
ticks = ax.set_xticks([0, 250, 500, 750, 1000])

In [169]:
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                           rotation=30, fontsize='small')

In [170]:
ax.set_title('My first matplotlib plot')

Text(0.5, 1.0, 'My first matplotlib plot')

In [171]:
ax.set_xlabel('Stages')

Text(0.5, 50.1224380326067, 'Stages')

In [172]:
props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}

In [173]:
ax.set(**props)

[Text(0.5, 1.0, 'My first matplotlib plot'),
 Text(0.5, 50.1224380326067, 'Stages')]

### Adding legends

In [174]:
from numpy.random import randn

In [175]:
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)

<IPython.core.display.Javascript object>

In [176]:
ax.plot(randn(1000).cumsum(), 'k', label='one')

[<matplotlib.lines.Line2D at 0x22544380b20>]

In [177]:
ax.plot(randn(1000).cumsum(), 'k--', label='two')

[<matplotlib.lines.Line2D at 0x22544382f20>]

In [178]:
ax.plot(randn(1000).cumsum(), 'k.', label='three')

[<matplotlib.lines.Line2D at 0x225443829e0>]

In [179]:
ax.legend(loc='best')

<matplotlib.legend.Legend at 0x225443804c0>

## Annotations and Drawing on a Subplot

In [180]:
ax.text(-12, -50, 'Hello World!',
       family='monospace', fontsize=10)

Text(-12, -50, 'Hello World!')

In [181]:
import pandas as pd
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
               xytext=(date, spx.asof(date) + 225),
               arrowprops=dict(facecolor='black', headwidth=4, width=2,
                              headlength=4),
               horizontalalignment='left', verticalalignment='top')

ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [182]:
data['SPX']

Date
1990-02-01     328.79
1990-02-02     330.92
1990-02-05     331.85
1990-02-06     329.66
1990-02-07     333.75
               ...   
2011-10-10    1194.89
2011-10-11    1195.54
2011-10-12    1207.25
2011-10-13    1203.66
2011-10-14    1224.58
Name: SPX, Length: 5472, dtype: float64

In [183]:
data.head()

Unnamed: 0_level_0,SPX
Date,Unnamed: 1_level_1
1990-02-01,328.79
1990-02-02,330.92
1990-02-05,331.85
1990-02-06,329.66
1990-02-07,333.75


In [184]:
spx

Date
1990-02-01     328.79
1990-02-02     330.92
1990-02-05     331.85
1990-02-06     329.66
1990-02-07     333.75
               ...   
2011-10-10    1194.89
2011-10-11    1195.54
2011-10-12    1207.25
2011-10-13    1203.66
2011-10-14    1224.58
Name: SPX, Length: 5472, dtype: float64

In [185]:
for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75))

In [186]:
for date, label in crisis_data:
    print(spx.asof(date))

1554.41
1308.77
1192.7


In [190]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6], [0.5, 0.5]],
                  color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x2254412fa30>

## Saving Plots to File

In [84]:
plt.savefig('figpath.svg')

In [85]:
plt.savefig('filepath.png', dpi=400, bbox_inches='tight')

In [86]:
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

## matplotlib Configuration

In [191]:
plt.rc('figure', figsize=(10, 10))

In [192]:
font_options = {'family': 'monospace',
                'weight': 'bold',
                'size': 'small'}

plt.rc('font', **font_options)

ValueError: Key font.size: Could not convert 'small' to float

# Plotting with pandas and seaborn

## Line Plots

In [193]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))

In [194]:
s

0    -0.811731
10   -2.768735
20   -3.615747
30   -3.016017
40   -4.583802
50   -5.510103
60   -4.521414
70   -4.696907
80   -6.622096
90   -7.420160
dtype: float64

In [198]:
s.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [199]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                 columns=['A', 'B', 'C', 'D'],
                 index=np.arange(0, 100, 10))

In [200]:
df

Unnamed: 0,A,B,C,D
0,-1.592551,-1.510167,1.287062,-0.060207
10,-2.670326,-2.580284,-1.724427,1.162643
20,-3.160703,-3.261433,-0.62414,1.097536
30,-3.831255,-3.828741,0.570275,1.077229
40,-5.003693,-4.475141,0.125021,0.073086
50,-4.980163,-3.543942,-2.112205,-0.198096
60,-4.797449,-2.796585,-2.781429,-0.348609
70,-3.888624,-3.150621,-4.104412,1.191411
80,-3.992611,-1.59783,-4.644686,1.93353
90,-4.545142,-2.075555,-5.804859,2.764845


In [201]:
df.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

## Bar Plots

In [202]:
fig, axes = plt.subplots(2, 1)

<IPython.core.display.Javascript object>

In [203]:
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))

In [204]:
data.plot.bar(ax=axes[0], color='k', alpha=0.7)

<AxesSubplot:>

In [205]:
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<AxesSubplot:>

In [50]:
data.value_counts().plot.bar()

<AxesSubplot:>

In [206]:
df = pd.DataFrame(np.random.rand(6, 4),
                 index=['one', 'two', 'three', 'four', 'five', 'six'],
                 columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))

In [207]:
df

Genus,A,B,C,D
one,0.616907,0.622745,0.778151,0.589335
two,0.837298,0.488164,0.499621,0.467748
three,0.453818,0.597211,0.734891,0.096221
four,0.121646,0.551118,0.314592,0.544515
five,0.873734,0.89092,0.053371,0.633022
six,0.813288,0.904217,0.227717,0.419802


In [208]:
df.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [209]:
df.plot.bar(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [210]:
df.plot.barh()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [211]:
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [212]:
tips = pd.read_csv('examples/tips.csv')

In [213]:
party_counts = pd.crosstab(tips['day'], tips['size'])

In [214]:
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [215]:
party_counts = party_counts.loc[:, 2:5]

In [216]:
party_counts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,16,1,1,0
Sat,53,18,13,1
Sun,39,15,18,3
Thur,48,4,5,1


In [217]:
party_pcts = party_counts.div(party_counts.sum(1), axis=0)

In [218]:
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [219]:
party_counts.sum(1)

day
Fri     18
Sat     85
Sun     75
Thur    58
dtype: int64

In [220]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='day'>

In [221]:
import seaborn as sns

In [222]:
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])

In [223]:
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [224]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [225]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [226]:
sns.set(style='whitegrid')

## Histograms and Density Plots

In [227]:
tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Frequency'>

In [228]:
tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Density'>

In [229]:
comp1 = np.random.normal(0, 1, size=200)

In [230]:
comp2 = np.random.normal(10, 2, size=200)

In [231]:
values = pd.Series(np.concatenate([comp1, comp2]))

In [241]:
sns.distplot(values, bins=100, color='k');



<IPython.core.display.Javascript object>

## Scatter or Point Plots

In [242]:
macro = pd.read_csv('examples/macrodata.csv')

In [243]:
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]

In [245]:
data.head()

Unnamed: 0,cpi,m1,tbilrate,unemp
0,28.98,139.7,2.82,5.8
1,29.15,141.7,3.08,5.1
2,29.35,140.5,3.82,5.3
3,29.37,140.0,4.33,5.6
4,29.54,139.6,3.5,5.2


In [250]:
trans_data = np.log(data).diff().dropna()

In [247]:
trans_data = np.log(data)
trans_data.head()

Unnamed: 0,cpi,m1,tbilrate,unemp
0,3.366606,4.939497,1.036737,1.757858
1,3.372455,4.953712,1.12493,1.629241
2,3.379293,4.945207,1.34025,1.667707
3,3.379974,4.941642,1.465568,1.722767
4,3.385745,4.938781,1.252763,1.648659


In [248]:
trans_data = np.log(data).diff()
trans_data.head()

Unnamed: 0,cpi,m1,tbilrate,unemp
0,,,,
1,0.005849,0.014215,0.088193,-0.128617
2,0.006838,-0.008505,0.215321,0.038466
3,0.000681,-0.003565,0.125317,0.05506
4,0.005772,-0.002861,-0.212805,-0.074108


In [249]:
trans_data = np.log(data).diff().dropna()
trans_data.head()

Unnamed: 0,cpi,m1,tbilrate,unemp
1,0.005849,0.014215,0.088193,-0.128617
2,0.006838,-0.008505,0.215321,0.038466
3,0.000681,-0.003565,0.125317,0.05506
4,0.005772,-0.002861,-0.212805,-0.074108
5,0.000338,0.004289,-0.266946,0.0


In [252]:
sns.regplot(x='m1', y='unemp', data=trans_data)

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='m1', ylabel='unemp'>

In [253]:
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5, 1.0, 'Changes in log m1 versus log unemp')

In [254]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x2254d8f7400>

## Facet Grids and Categorical Data

In [256]:
sns.catplot(x='day', y='tip_pct', hue='time', col='smoker',
              kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2254e70bcd0>

In [257]:
sns.catplot(x='day', y='tip_pct', row='time', col='smoker',
           kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2254f1857b0>

In [258]:
sns.catplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2254f1ea050>

# Other Python Visualization Tools