# 9.1 A Brief matplotlib API Primer

In [1]:
%matplotlib notebook

In [4]:
import matplotlib.pyplot as plt 
import numpy as np 

data = np.arange(10)
data


array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x11f80c908>]

## 9.1.1 Figures and Subplots

In [8]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [9]:
ax1 = fig.add_subplot(2,2,1)

In [10]:
ax2 = fig.add_subplot(2,2,2)

In [11]:
ax3 = fig.add_subplot(2,2,3)

In [18]:
fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)

<IPython.core.display.Javascript object>

In [19]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x120a61550>]

In [20]:
_ = ax1.hist(np.random.randn(100), bins = 20, color = 'k', alpha = 0.3)

In [21]:
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x1208b27b8>

In [23]:
fig, axes = plt.subplots(2,3)

<IPython.core.display.Javascript object>

In [25]:
axes

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x120d42cf8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120f15e48>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120f550b8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x120f84668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120fb6c18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x120ff4208>]],
      dtype=object)

**Adjusting the spacing around subplots**

In [27]:
subplots_adjust(left = None, bottom = None, right = None, top = None, wspace = None, hspace = None)

NameError: name 'subplots_adjust' is not defined

In [35]:
fig, axes = plt.subplots(2,2,sharex = True, sharey = True)
for i in range(2):
    for j in range(2):
        axes[i,j].hist(np.random.randn(500), bins = 50, color = 'k', alpha = 0.5)
plt.subplots_adjust(wspace = 0, hspace = 0)

<IPython.core.display.Javascript object>

## 9.1.2 Colors, Markers, and Line Styles

In [None]:
ax.plot(x,y,'g--')
ax.plot(x,y,linestyle = '--', color = 'g')

In [36]:
from numpy.random import randn
fig = plt.figure()
plt.plot(randn(30).cumsum(), 'ko--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x12265b1d0>]

In [None]:
plot(randn(30).cumsum(), color = 'k', linestyle = 'dashed', marker = 'o')

In [38]:
fig = plt.figure()
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label = 'Default')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x12297e668>]

In [42]:
fig = plt.figure()
plt.plot(data, 'k--', drawstyle = 'steps-post', label = 'steps-post')
plt.legend(loc = 'best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x122f70cc0>

## 9.1.3 Ticks, Labels, and Legends

**Setting the title, axis labels, ticks, and ticklabels**

In [43]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum())

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1233f4ac8>]

In [44]:
ticks = ax.set_xticks([0,250,500,750,1000])

In [45]:
labels = ax.set_xticklabels(['one','two','three','four','five'], rotation = 30, fontsize = 'small')

In [46]:
ax.set_title('My first matplotlib plot')

Text(0.5, 1, 'My first matplotlib plot')

In [47]:
ax.set_xlabel('Stages')

Text(0.5, 10.763891973024519, 'Stages')

In [48]:
props = {
    'title':'My first matplotlib plot',
    'xlabel':'Stages'
}
ax.set(**props)

[Text(0.5, 10.763891973024519, 'Stages'),
 Text(0.5, 1, 'My first matplotlib plot')]

**Adding legends**

In [52]:
from numpy.random import randn
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(randn(1000).cumsum(), 'k', label = 'one')
ax.plot(randn(1000).cumsum(), 'k--', label = 'two')
ax.plot(randn(1000).cumsum(), 'k.', label = 'three')
ax.legend(loc = 'best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1242fdbe0>

## 9.1.4 Annotations and Drawing on a Subplot

In [59]:
from datetime import datetime 
import pandas as pd

fig = plt.figure()
ax = fig.add_subplot(1,1,1)

data = pd.read_csv('/Users/boyuan/Desktop/OneDrive - Duke University/Data Science Box/Python/Python for data analysis 2nd/examples/spx.csv',
                   index_col = 0,
                   parse_dates = True)
spx = data['SPX']

spx.plot(ax = ax, style = 'k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Leham Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')

<IPython.core.display.Javascript object>

In [60]:
# Zoom in on 2007 - 2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600,1800])

(600, 1800)

In [61]:
ax.set_title('Important dates in the 2008 - 2009 financial crisis')

Text(0.5, 1, 'Important dates in the 2008 - 2009 financial crisis')

In [64]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color = 'k', alpha = 0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color = 'b', alpha = 0.3)
pgon = plt.Polygon([[0.15, 0.15],[0.35, 0.4],[0.2, 0.6]], color = 'g', alpha = 0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x126fbeac8>

## 9.1.5 Saving Plots to File

In [None]:
plt.savefig('figpath.svg')

In [None]:
plt.savefig('figpath.png', dpi = 400, bbox_inches = 'tight')

In [None]:
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

## 9.1.6 matplotlib Configuration

In [None]:
plt.rc('figure', figsize = (10,10))

In [None]:
font_options = {'family':'monospace', 
                'weight':'bold',
                'size':'small'}
plt.rc('font', **font_options)

# 9.2 Plotting with pandas and seaborn

## 9.2.1 Line Plots

In [65]:
fig = plt.figure()
s = pd.Series(np.random.randn(10).cumsum(), index = np.arange(0,100,10))
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x122d31278>

In [66]:
df = pd.DataFrame(np.random.randn(10,4).cumsum(0),
                  columns = ['A','B','C','D'],
                  index = np.arange(0,100,10))
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x124b6c588>

## 9.2.2 Bar Plots

In [67]:
fig, axes = plt.subplots(2,1)

data = pd.Series(np.random.rand(16), index = list('abcdefghijklmnop'))

data.plot.bar(ax = axes[0], color = 'k', alpha = 0.7)

data.plot.barh(ax = axes[1], color = 'k', alpha = 0.7)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1285d2be0>

In [70]:
df = pd.DataFrame(np.random.rand(6,4),
                  index = ['one','two','three','four','five','six'],
                  columns = pd.Index(['A','B','C','D'], name = 'Genus'))
df

Genus,A,B,C,D
one,0.901813,0.454975,0.028978,0.3828
two,0.667159,0.279347,0.973368,0.488664
three,0.125011,0.506413,0.421096,0.260748
four,0.11249,0.845163,0.057449,0.714492
five,0.97925,0.298943,0.474107,0.898079
six,0.260572,0.928112,0.424887,0.416623


In [71]:
df.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x12858a668>

In [72]:
df.plot.line()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1286d56d8>

In [73]:
df.plot.bar(stacked = True, alpha = 0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x128b782b0>

In [76]:
df.plot.barh(stacked = True, alpha = 0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x124a19b38>

In [77]:
%pwd

'/Users/boyuan/Desktop/OneDrive - Duke University/Data Science Box/Python/Python for data analysis 2nd/Practices'

In [79]:
tips = pd.read_csv('/Users/boyuan/Desktop/OneDrive - Duke University/Data Science Box/Python/Python for data analysis 2nd/examples/tips.csv')

tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size
0,16.99,1.01,No,Sun,Dinner,2
1,10.34,1.66,No,Sun,Dinner,3
2,21.01,3.5,No,Sun,Dinner,3
3,23.68,3.31,No,Sun,Dinner,2
4,24.59,3.61,No,Sun,Dinner,4


In [80]:
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [81]:
# Not many 1- and 6-person parties 
party_counts = party_counts.loc[:,2:5]

In [82]:
# Normalize to sum to 1 
party_pcts = party_counts.div(party_counts.sum(1), axis = 0)
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [86]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2bf1f080>

In [84]:
import seaborn as sns

tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [89]:
fig = plt.figure()
sns.barplot(x = 'tip_pct', y = 'day', data = tips, orient = 'h')

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2c439e48>

In [95]:
fig = plt.figure()
sns.barplot(x = 'tip_pct', y = 'day', hue = 'time', data = tips, orient = 'h')

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2d215cf8>

In [91]:
sns.set(style = 'whitegrid')

## 9.2.3 Histograms and Density Plots

In [98]:
fig = plt.figure()
tips['tip_pct'].plot.hist(bins = 50)

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2d7661d0>

In [101]:
fig = plt.figure()
tips['tip_pct'].plot.density()

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2e2b1f98>

In [106]:
fig = plt.figure()

comp1 = np.random.normal(0,1,size = 200)
comp2 = np.random.normal(10,2,size = 200)
values = pd.Series(np.concatenate([comp1, comp2]))

sns.distplot(values, bins = 100, color = 'k')

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2e4cafd0>

## 9.2.4 Scatter or Point Plots

In [104]:
macro = pd.read_csv('/Users/boyuan/Desktop/OneDrive - Duke University/Data Science Box/Python/Python for data analysis 2nd/examples/macrodata.csv')

data = macro[['cpi','m1','tbilrate','unemp']]

trans_data = np.log(data).diff().dropna()

trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [107]:
fig = plt.figure()
sns.regplot('m1','unemp', data = trans_data)

  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2eb5b240>

In [108]:
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5, 1, 'Changes in log m1 versus log unemp')

In [109]:
sns.pairplot(trans_data, diag_kind = 'kde', plot_kws = {'alpha':0.2})

  squeeze=False)


<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1a2ed3a7b8>

## 9.2.5 Facet Grids and Categorical Data 

In [110]:
sns.factorplot(x = 'day', y = 'tip_pct', hue = 'time', col = 'smoker', kind = 'bar', data = tips[tips.tip_pct < 1])

  fig, axes = plt.subplots(nrow, ncol, **kwargs)


<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1a2ff23390>

In [111]:
sns.factorplot(x = 'day', y = 'tip_pct', row = 'time', col = 'smoker', kind = 'bar', data = tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1a3082d5c0>

In [112]:
sns.factorplot(x = 'tip_pct', y = 'day', kind = 'box', data = tips[tips.tip_pct < 0.5])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1a312448d0>

# 9.3 Other Python Visualization Tools

# 9.4 Conclusion