# chapter 9 plotting and visualization

In [1]:
%matplotlib notebook

## 9.1 A brief matplotlib API primer

In [2]:
import matplotlib.pyplot as plt

In [3]:
import numpy as np

In [4]:
data=np.arange(10)

In [5]:
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2ae5d7e2ba8>]

### Figures and Subplots

In [7]:
fig=plt.figure()

<IPython.core.display.Javascript object>

In [8]:
ax1=fig.add_subplot(2,2,1)

In [9]:
ax2=fig.add_subplot(2,2,3)

In [10]:
ax3=fig.add_subplot(2,2,2)

In [11]:
fig=plt.figure()
ax1=fig.add_subplot(2,2,1)
ax2=fig.add_subplot(2,2,2)
ax3=fig.add_subplot(2,2,3)

<IPython.core.display.Javascript object>

In [12]:
plt.plot(np.random.randn(50).cumsum(),'k--')

[<matplotlib.lines.Line2D at 0x2ae5dd1b668>]

In [13]:
_=ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

In [14]:
_=ax2.scatter(np.arange(30), np.arange(30)+3*np.random.randn(30))

In [15]:
fig, axes= plt.subplots(2,3)

<IPython.core.display.Javascript object>

In [16]:
axes

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000002AE5D648E80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002AE5DB912B0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002AE5DBB7828>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000002AE5DA60DA0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002AE5DAD2358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002AE5DAF88D0>]],
      dtype=object)

In [17]:
axes[0,1]

<matplotlib.axes._subplots.AxesSubplot at 0x2ae5db912b0>

### Adjusting the spacing around subplots

In [18]:
fig, axes=plt.subplots(2,2,sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i,j].hist(np.random.randn(500),bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0,hspace=0)

<IPython.core.display.Javascript object>

### Colors, Markers, and Line Styles

In [19]:
plot?

Object `plot` not found.


In [20]:
plt.figure()
plt.plot(np.random.randn(30).cumsum(),'ko--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2ae5e1c7cc0>]

In [21]:
plt.plot(np.random.randn(30).cumsum(),color='k',linestyle='dashed',marker='o')

[<matplotlib.lines.Line2D at 0x2ae5e1d4668>]

In [22]:
data=np.random.randn(30).cumsum()

In [23]:
plt.figure()
plt.plot(data,'k--',label='Default')

plt.plot(data,'k-',drawstyle='steps-post',label='steps-post')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2ae5e1d4160>]

In [24]:
plt.legend(loc='best')

<matplotlib.legend.Legend at 0x2ae5d648940>

### Ticks, Labels, and Legends

#### setting the title, axis labels, ticks, and ticklabels

In [25]:
fig=plt.figure()

<IPython.core.display.Javascript object>

In [26]:
ax=fig.add_subplot(1,1,1)

In [27]:
ax.plot(np.random.randn(1000).cumsum())

[<matplotlib.lines.Line2D at 0x2ae5e24a7f0>]

In [28]:
ticks=ax.set_xticks([0,250,500,750,1000])

In [29]:
labels=ax.set_xticklabels(['one','two','three','four','five']
                         , rotation=30, fontsize='small')

In [30]:
ax.set_title('My first matplotlib plot')

Text(0.5, 1.0, 'My first matplotlib plot')

In [31]:
ax.set_xlabel('Stages')

Text(0.5, 0, 'Stages')

In [32]:
props={'title':'My first matplotlib plot'
     ,'xlabel':'Stages'
     }
ax.set(**props)

[Text(0.5, 0, 'Stages'), Text(0.5, 1.0, 'My first matplotlib plot')]

#### Adding legends

In [33]:
from numpy.random import randn

In [34]:
fig=plt.figure();
ax=fig.add_subplot(1,1,1)

<IPython.core.display.Javascript object>

In [35]:
ax.plot(randn(1000).cumsum()
       , 'k' ,label='one')
ax.plot(randn(1000).cumsum()
       , 'k--', label='two')
ax.plot(randn(1000).cumsum()
       , 'k.', label='three')
ax.legend(loc='best')

<matplotlib.legend.Legend at 0x2ae5e297550>

In [36]:
# 几个基本要素：1、title 2、xlabel 3、ylabel 4、xtick 5、ytick 6、xticklabel 7、yticklabel
# 8、line 9、linelabel 10、legends 11、xlim 12、ylim

In [37]:
ax.legend?

### Annotations and Drawing on a Subplot

In [38]:
from datetime import datetime

In [39]:
fig=plt.figure()
ax=fig.add_subplot(1,1,1)

<IPython.core.display.Javascript object>

In [40]:
import pandas as pd

In [41]:
data=pd.read_csv(r'E:\学习资料\python\pydata-book-2nd-edition\pydata-book-2nd-edition\examples\spx.csv'
                , engine='python',index_col=0,parse_dates=True)

In [42]:
spx=data['SPX']

In [43]:
spx.plot(ax=ax,style='k-')

<matplotlib.axes._subplots.AxesSubplot at 0x2ae5e28e978>

In [44]:
crisis_data=[
    (datetime(2007,10,11),'Peak of bull market')
    ,(datetime(2008,3,12),'Bear Stearns Fails')
    ,(datetime(2008,9,15),'Lehman Bankruptcy')
]

In [45]:
for date,label in crisis_data:
    ax.annotate(label
                , xy=(date,spx.asof(date)+75)
                , xytext=(date,spx.asof(date)+225)
                , arrowprops=dict(facecolor='black'
                                , headwidth=4
                                , width=2
                                , headlength=4)
                , horizontalalignment='left'
                , verticalalignment='top')

In [46]:
# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007','1/1/2011'])

(732677.0, 734138.0)

In [47]:
ax.set_ylim([600,1800])

(600, 1800)

In [48]:
ax.set_title('Important dates in the 2008-2009 financial crisis')

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [49]:
fig=plt.figure()

<IPython.core.display.Javascript object>

In [50]:
ax=fig.add_subplot(1,1,1)

In [51]:
rect=plt.Rectangle((0.2,0.75), 0.4,0.15,color='k',alpha=0.3)

In [52]:
circ=plt.Circle((0.7,0.2),0.15,color='b',alpha=0.3)

In [54]:
pgon=plt.Polygon([[0.15,0.15],[0.35,0.4],[0.2,0.6]]
                , color='g', alpha=0.5)

In [55]:
ax.add_patch(rect)

<matplotlib.patches.Rectangle at 0x2ae5ef54710>

In [56]:
ax.add_patch(circ)

<matplotlib.patches.Circle at 0x2ae5ef54cc0>

In [57]:
ax.add_patch(pgon)

<matplotlib.patches.Polygon at 0x2ae62d1e4a8>

### Saving Plots to File

In [58]:
# plt.savefig('figpath.png',dpi=400,bbox_inches='tight')

## 9.2 Plotting with pandas and seaborn

### Line Plots

In [60]:
s=pd.Series(np.random.randn(10).cumsum()
           , index=np.arange(0,100,10))

In [62]:
fig=plt.figure()
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae62d02f60>

In [63]:
df=pd.DataFrame(np.random.randn(10,4).cumsum(0)
               , columns=['A','B','C','D']
               , index=np.arange(0,100,10))

In [64]:
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae5f4bd358>

### Bar Plots

In [66]:
fig,axes=plt.subplots(2,1)

<IPython.core.display.Javascript object>

In [67]:
data=pd.Series(np.random.rand(16)
              , index=list('abcdefghijklmnop'))

In [68]:
data.plot.bar(ax=axes[0], color='k', alpha=0.7)

<matplotlib.axes._subplots.AxesSubplot at 0x2ae62ca2c88>

In [69]:
data.plot.barh(ax=axes[1],color='k',alpha=0.7)

<matplotlib.axes._subplots.AxesSubplot at 0x2ae634e8668>

In [70]:
df=pd.DataFrame(np.random.rand(6,4)
               , index=['one','two','three','four','five','six']
               , columns=pd.Index(['A','B','C','D'], name='Genus'))

In [71]:
df

Genus,A,B,C,D
one,0.041792,0.773608,0.703145,0.80234
two,0.23591,0.896291,0.586661,0.332474
three,0.29295,0.786567,0.655105,0.942324
four,0.547781,0.889356,0.374693,0.153905
five,0.828828,0.067414,0.200655,0.52668
six,0.752958,0.057947,0.919409,0.836677


In [77]:
df.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae64949978>

In [78]:
df.plot.barh(stacked=True,alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae64d4db00>

In [80]:
tips=pd.read_csv(r'E:\学习资料\python\pydata-book-2nd-edition\pydata-book-2nd-edition\examples\tips.csv')

In [81]:
party_counts=pd.crosstab(tips['day'], tips['size'])

In [82]:
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [84]:
# Not many 1- and 6-person parties

In [85]:
party_counts=party_counts.loc[:,2:5]

In [86]:
party_counts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,16,1,1,0
Sat,53,18,13,1
Sun,39,15,18,3
Thur,48,4,5,1


In [89]:
party_pacts=party_counts.div(party_counts.sum(1),axis=0)

In [90]:
party_pacts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [91]:
party_pacts.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae658edcc0>

In [92]:
import seaborn as sns

In [95]:
tips['tip_pct']=tips['tip']/(tips['total_bill']-tips['tip'])

In [96]:
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [98]:
fig=plt.figure()
sns.barplot(x='tip_pct',y='day',data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae683cc4a8>

In [100]:
plt.figure()
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae683ee358>

In [102]:
sns.set(style='whitegrid')

### Histograms and Density Plots

In [104]:
plt.figure()
tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae6819f6d8>

In [105]:
plt.figure()
tips['tip_pct'].plot.density()



<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae68223048>

In [107]:
plt.figure()
comp1=np.random.normal(0,1,size=200)
comp2=np.random.normal(10,2,size=200)
values=pd.Series(np.concatenate([comp1,comp2]))
sns.distplot(values, bins=100,color='k')



<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2ae683246a0>

### Scatter or Point Plots

In [108]:
macro=pd.read_csv(r'E:\学习资料\python\pydata-book-2nd-edition\pydata-book-2nd-edition\examples\macrodata.csv')

In [109]:
data=macro[['cpi','m1','tbilrate','unemp']]

In [127]:
trans_data=np.log(data).diff().dropna()

In [128]:
trans_data[-5:-1]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339


In [132]:
plt.figure()
sns.regplot('m1','unemp',data=trans_data)
plt.title('Changes in log %s varsus log %s' %('m1','unemp'))

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Changes in log m1 varsus log unemp')

In [133]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha':0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x2ae69fb0a90>

### Facet Grids and Categorical Data

In [137]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker', kind='bar', data=tips[tips.tip_pct<1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2ae6bd3ff60>

In [138]:
sns.factorplot(x='day',y='tip_pct', row='time'
              , col='smoker' , kind='bar', data=tips[tips.tip_pct<1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2ae6af0ec88>

In [139]:
sns.factorplot(x='tip_pct',y='day', kind='box', data=tips[tips.tip_pct<0.5])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x2ae6ea6ec88>

## 9.3 Other Python Visualization Tools