## 绘图与可视化

In [1]:
# 引入
import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

In [2]:
%matplotlib notebook

### matplotlib API 入门

In [3]:
import matplotlib.pyplot as plt

In [4]:
import numpy as np
data = np.arange(10)
data
plt.plot(data)
plt.show()
# 简单线型图

<IPython.core.display.Javascript object>

#### 图片与子图

In [7]:
# matplotlib 所绘制的图位于图片(Figure) 对象中，可以使用plt.figure生成一个新的图片
fig = plt.figure()

<IPython.core.display.Javascript object>

In [8]:
ax1 = fig.add_subplot(2, 2, 1)

In [12]:
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)



In [13]:
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
# 带有三个子图的空白图片

<IPython.core.display.Javascript object>

In [15]:
plt.plot(np.random.randn(50).cumsum(), 'k--')
plt.show()

In [17]:
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
plt.show()
# 增加子图的数据可视化

In [18]:
plt.close('all')

In [19]:
fig, axes = plt.subplots(2, 3)
axes
plt.show()

<IPython.core.display.Javascript object>

#### 调整子图周围的间距

##### 使用subplots_adjust更改间距
subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=None, hspace=None)

In [21]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)
# 没有内部子图间距的数据可视化

<IPython.core.display.Javascript object>

### 颜色、标记和线型

#####  接受 x,y及图例
ax.plot(x, y, 'g--')

ax.plot(x, y, linestyle='--', color='g')

In [23]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [25]:
from numpy.random import randn
plt.plot(randn(30).cumsum(), 'ko--')
plt.show()

In [27]:
plt.close('all')

In [28]:
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')
plt.show()

<IPython.core.display.Javascript object>

#### 刻度、标签和图例

##### 设置标题、轴标签、刻度和刻度标签

In [29]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())
plt.show()

<IPython.core.display.Javascript object>

In [30]:
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')

In [31]:
ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

Text(0.5,16.7896,'Stages')

props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}
ax.set(**props)

#### 添加图例

In [33]:
from numpy.random import randn
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
plt.show()

<IPython.core.display.Javascript object>

In [35]:
ax.legend(loc='best')
plt.show()
# 有三根折线和图例的简单图表

#### 注释和子图加工

##### text在图表上给定的坐标 (x,y) 根据可选的定制样式绘制文本
ax.text(x, y, 'Hello world!',
        family='monospace', fontsize=10)

In [36]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

<IPython.core.display.Javascript object>

Text(0.5,1,'Important dates in the 2008-2009 financial crisis')

In [None]:
ax.set_title('Important dates in the 2008–2009 financial crisis')

In [38]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)
plt.show()

<IPython.core.display.Javascript object>

In [39]:
fig = plt.figure(figsize=(12, 6)); ax = fig.add_subplot(1, 1, 1)
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x1cbfe8e7c50>

#### 将图片保存到文件

plt.savefig('figpath.svg')  //图片保存为SVG

plt.savefig('figpath.png', dpi=400, bbox_inches='tight')  //为得到PNG图片，且使用最小的空白，拥有400DIP

from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

#### matplotlib 设置

In [40]:
plt.rc('figure', figsize=(10, 10))

font_options = {'family' : 'monospace',
                'weight' : 'bold',
                'size'   : 'small'}
plt.rc('font', **font_options)

#### 使用 pandas 和 seaborn绘图

#### 折线图

In [42]:
plt.close('all')

In [44]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()
plt.show()

<IPython.core.display.Javascript object>

In [46]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
df.plot()
plt.show()

<IPython.core.display.Javascript object>

#### 柱状图

In [47]:
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1cb83b9ac88>

In [48]:
np.random.seed(12348)

In [50]:
df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df
df.plot.bar()
# DataFrame柱状图---好漂亮
plt.show()

<IPython.core.display.Javascript object>

In [51]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
df.plot.barh(stacked=True, alpha=0.5)
plt.show()

<IPython.core.display.Javascript object>

In [53]:
plt.close('all')

In [55]:
tips = pd.read_csv('tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts
# Not many 1- and 6-person parties
party_counts = party_counts.loc[:, 2:5]

In [57]:
# Normalize to sum to 1
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts
party_pcts.plot.bar()
plt.show()

<IPython.core.display.Javascript object>

In [58]:
plt.close('all')

In [59]:
import seaborn as sns
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

  stat_data = remove_na(group_data)


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1cb8ba05860>

In [60]:
plt.close('all')

In [61]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

  stat_data = remove_na(group_data[hue_mask])


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1cb8babaeb8>

In [None]:
plt.close('all')

In [62]:
sns.set(style="whitegrid")

#### 直方图和密度图

In [63]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [65]:
tips['tip_pct'].plot.hist(bins=50)
plt.show()

In [66]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [67]:
tips['tip_pct'].plot.density()
plt.show()

In [68]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [69]:
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))
sns.distplot(values, bins=100, color='k')



<matplotlib.axes._subplots.AxesSubplot at 0x1cb8e654c88>

#### 散点图和点图

In [70]:
macro = pd.read_csv('macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [71]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [72]:
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5,1,'Changes in log m1 versus log unemp')

In [73]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1cb810535c0>

#### 分面网格和分类数据

In [74]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])
# 按星期日期 / 时间 / 是否吸烟划分的小费百分比

<IPython.core.display.Javascript object>

  stat_data = remove_na(group_data[hue_mask])


<seaborn.axisgrid.FacetGrid at 0x1cb860bfe10>

In [75]:
sns.factorplot(x='day', y='tip_pct', row='time',
               col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

  stat_data = remove_na(group_data)


<seaborn.axisgrid.FacetGrid at 0x1cb866344a8>

In [76]:
sns.factorplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])

<IPython.core.display.Javascript object>

  box_data = remove_na(group_data)


<seaborn.axisgrid.FacetGrid at 0x1cb866c6eb8>