In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook

## 9.1 matplotlib API入门

In [12]:
data = np.arange(10)
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1e5320711d0>]

### Figure和Subplot

In [17]:
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)

<IPython.core.display.Javascript object>

In [18]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x2e6daac4438>]

In [19]:
ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

(array([ 2.,  0.,  0.,  3.,  7.,  6.,  6., 11., 14.,  9., 11.,  9.,  8.,
         4.,  2.,  3.,  3.,  0.,  1.,  1.]),
 array([-2.33489081, -2.06848787, -1.80208492, -1.53568197, -1.26927903,
        -1.00287608, -0.73647313, -0.47007019, -0.20366724,  0.06273571,
         0.32913865,  0.5955416 ,  0.86194455,  1.12834749,  1.39475044,
         1.66115339,  1.92755633,  2.19395928,  2.46036223,  2.72676517,
         2.99316812]),
 <a list of 20 Patch objects>)

In [20]:
ax2.scatter(np.arange(30), np.arange(30) + 3*np.random.randn(30))

<matplotlib.collections.PathCollection at 0x2e6d9ebbd30>

In [21]:
fig, axes = plt.subplots(2, 3)
# 创建新的figure

<IPython.core.display.Javascript object>

In [23]:
axes

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000002E6DAB132B0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002E6DAB42438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002E6DAB65898>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000002E6DAB8FEB8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002E6DABBF5C0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002E6DABE8C18>]],
      dtype=object)

## 调整subplot周围的间距

In [30]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
        
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

## 颜色、标记和线型

In [33]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
plt.plot(np.random.randn(30).cumsum(), 'ko--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2e6db9ff4e0>]

In [38]:
data = np.random.randn(30).cumsum()

In [39]:
fig = plt.figure()
axe = fig.add_subplot(1,1,1)

<IPython.core.display.Javascript object>

In [40]:
plt.plot(data, 'k--', label='Default')

[<matplotlib.lines.Line2D at 0x2e6de1d9ef0>]

In [41]:
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')

[<matplotlib.lines.Line2D at 0x2e6de764a90>]

In [42]:
plt.legend(loc='best')

<matplotlib.legend.Legend at 0x2e6de772160>

### 刻度、标签和图例

In [43]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum())

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2e6de7b3f28>]

In [44]:
ticks = ax.set_xticks([0, 250, 500, 750, 1000])

In [45]:
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                           rotation=30, fontsize='small')

In [46]:
ax.set_title('My first matplotlib plot')

Text(0.5,1,'My first matplotlib plot')

In [48]:
ax.set_xlabel('Stages')

Text(0.5,17.3199,'Stages')

In [49]:
props = {'title': 'My first matplotlib plot',
        'xlabel': 'Stages'}
ax.set(**props)

[Text(0.5,17.3199,'Stages'), Text(0.5,1,'My first matplotlib plot')]

In [59]:
fig = plt.figure(); ax = fig.add_subplot(1,1,1)

<IPython.core.display.Javascript object>

In [52]:
ax.plot(np.random.randn(1000).cumsum(), 'k', label='one')

[<matplotlib.lines.Line2D at 0x2e6de7c4390>]

In [60]:
ax.plot(np.random.randn(1000).cumsum(), 'k--', label='two')

[<matplotlib.lines.Line2D at 0x2e6db6ba3c8>]

In [61]:
ax.plot(np.random.randn(1000).cumsum(), 'k.', label='three')

[<matplotlib.lines.Line2D at 0x2e6dfe7da90>]

In [62]:
ax.legend(loc='best')

<matplotlib.legend.Legend at 0x2e6dfe88518>

### 注解以及在Subplot上绘图

In [63]:
ax.text(1, 20, 'Hello world!',
       family='monospace', fontsize=10)

Text(1,20,'Hello world!')

In [65]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

<IPython.core.display.Javascript object>

Text(0.5,1,'Important dates in the 2008-2009 financial crisis')

In [61]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

<IPython.core.display.Javascript object>

In [62]:
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ  = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

In [63]:
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<matplotlib.patches.Polygon at 0x1e53560b898>

### 将图标保存到文件

In [38]:
plt.savefig('figpath.svg')

In [39]:
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

In [41]:
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

### matplotlib配置

In [60]:
plt.rc('figure', figsize=(5, 5))

In [56]:
# font_options = {'family': 'monospace',
#                'weight': 'bold'}
# plt.rc('font', **font_options)

## 9.2 使用pandas和seaborn绘图

### 线型图

In [94]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
fig  = plt.figure()
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e537c68860>

In [65]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
df

Unnamed: 0,A,B,C,D
0,1.693549,0.199797,-0.112784,2.529148
10,2.677092,0.567456,0.70577,2.381915
20,1.919898,-0.148148,1.497821,0.864898
30,1.406286,0.596006,3.004201,-0.132478
40,0.966036,-0.126714,3.62392,-0.202129
50,2.207243,1.377926,4.86857,-0.25796
60,2.290599,0.581003,4.287118,-1.991537
70,3.273204,-0.095309,3.591276,-0.935431
80,5.571869,-0.570414,4.845144,-1.3213
90,5.83046,-0.0891,6.761717,-1.989515


In [95]:
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e537cbc320>

### 柱状图

In [96]:
fig, axes = plt.subplots(2, 1)

<IPython.core.display.Javascript object>

In [70]:
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))

In [97]:
data.plot.bar(ax=axes[0], color='k', alpha=0.7)

<matplotlib.axes._subplots.AxesSubplot at 0x1e53a4ea860>

In [98]:
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<matplotlib.axes._subplots.AxesSubplot at 0x1e53a513e48>

In [99]:
df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df

Genus,A,B,C,D
one,0.073348,0.18054,0.544636,0.585461
two,0.811264,0.448901,0.042972,0.601629
three,0.479605,0.68287,0.513647,0.586454
four,0.256263,0.25174,0.733777,0.825629
five,0.93552,0.543791,0.180149,0.886089
six,0.646024,0.574813,0.216651,0.344008


In [115]:
df.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e53f1302b0>

In [79]:
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e5325a4a90>

In [101]:
tips = pd.read_csv('tips.csv')

In [102]:
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size
0,16.99,1.01,No,Sun,Dinner,2
1,10.34,1.66,No,Sun,Dinner,3
2,21.01,3.5,No,Sun,Dinner,3
3,23.68,3.31,No,Sun,Dinner,2
4,24.59,3.61,No,Sun,Dinner,4


In [103]:
party_counts = pd.crosstab(tips['day'], tips['size'])
## 用于统计分组频率
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [104]:
party_counts = party_counts.loc[:, 2:5]

In [105]:
party_counts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,16,1,1,0
Sat,53,18,13,1
Sun,39,15,18,3
Thur,48,4,5,1


In [106]:
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
# 每行的和为1

In [107]:
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [90]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e537a4e630>

In [111]:
import seaborn as sns

In [113]:
tips['tip_pct'] = tips['tip']/(tips['total_bill'] - tips['tip'])
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [120]:
fig = plt.figure()
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<matplotlib.axes._subplots.AxesSubplot at 0x1e53f398cf8>

In [121]:
fig = plt.figure()
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<matplotlib.axes._subplots.AxesSubplot at 0x1e53f4459b0>

### 直方图和密度图

In [122]:
tip_pct = tips['tip_pct']

In [124]:
tip_pct.head()

0    0.063204
1    0.191244
2    0.199886
3    0.162494
4    0.172069
Name: tip_pct, dtype: float64

In [125]:
fig = plt.figure()

tip_pct.plot.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e540b42c50>

In [126]:
fig = plt.figure()
tip_pct.plot.density()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1e54105a160>

In [127]:
comp1 = np.random.normal(0, 1, size=200)

In [128]:
comp2 = np.random.normal(10, 2, size=200)

In [129]:
values = pd.Series(np.concatenate([comp1, comp2]))

In [130]:
fig = plt.figure()
sns.distplot(values, bins=100, color='k')

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<matplotlib.axes._subplots.AxesSubplot at 0x1e5415199b0>

### 散布图或点图

In [133]:
macro = pd.read_csv('macrodata.csv')

In [134]:
macro.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [135]:
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]

In [136]:
data.head()

Unnamed: 0,cpi,m1,tbilrate,unemp
0,28.98,139.7,2.82,5.8
1,29.15,141.7,3.08,5.1
2,29.35,140.5,3.82,5.3
3,29.37,140.0,4.33,5.6
4,29.54,139.6,3.5,5.2


In [144]:
trans_data = np.log(data).diff().dropna()

In [145]:
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [146]:
fig = plt.figure()
sns.regplot('m1', 'unemp', data=trans_data)

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<matplotlib.axes._subplots.AxesSubplot at 0x1e5420840f0>

In [147]:
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5,1,'Changes in log m1 versus log unemp')

In [152]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<seaborn.axisgrid.PairGrid at 0x1e5476be6a0>

### 分面网格和类型数据

In [153]:
 sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
                kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<seaborn.axisgrid.FacetGrid at 0x1e549157b00>

In [154]:
sns.factorplot(x='day', y='tip_pct', row='time',
               col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


<seaborn.axisgrid.FacetGrid at 0x1e5494aa550>

In [155]:
sns.factorplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1e54ab0a128>