# § Chapter9  Plotting and Visualization

## §9.1 A Brief matplotlib API Primer
-	Figures and Subplots
-	Colors, Markers, and Line Styles
-	Ticks, Labels, and Legends
-	Annotations and Drawing on a Subplot
-	Saving Plots to File
-	matplotlib Configuration

## §9.2 Plotting with pandas and seaborn
-	Line Plots
-	Bar Plots
-	Histograms and Density Plots
-	Scatter or Point Plots
-	Facet Grids and Categorical Data

## §9.3 Other Python Visualization Tools

In [1]:
import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows #display max raw in dataframe
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))

#precision:控制打印精度(小數位數), suppress 是否要改以科學記號顯示
np.set_printoptions(precision=4, suppress=True)

plt.rcParams['axes.unicode_minus']=False # 用來正常顯示負號

```python
%matplotlib notebook
```

## §9.1 A Brief matplotlib API Primer
-	Figures and Subplots
-	Colors, Markers, and Line Styles
-	Ticks, Labels, and Legends
-	Annotations and Drawing on a Subplot
-	Saving Plots to File
-	matplotlib Configuration

In [2]:
%matplotlib notebook

In [3]:
import matplotlib.pyplot as plt

In [4]:
plt.plot([1,2,3,4])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2637e0c5ca0>]

In [5]:
fig = plt.figure()
plt.plot([4,3,2,2])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2637e4119d0>]

In [6]:
plt.close('all')

In [7]:
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))

In [8]:
import numpy as np
data = np.arange(10)
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2637e491460>]

### §9.1.1 Figures and Subplots

In [10]:
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

<IPython.core.display.Javascript object>

In [11]:
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax3.plot(np.random.randn(50).cumsum(), color="black", linestyle="dashed")# 'k--' black dashed line 
# cumsum() -> Return the cumulative sum of the elements along a given axis.

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2637f28b370>]

In [12]:
np.random.seed(12345)
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

plt.plot(np.random.randn(50).cumsum(), 'k--')
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3) # Histogram 直方圖  alpha => transparency透明度
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30)) # plt.scatter(x,y)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x2637f856700>

In [13]:
plt.close('all')

In [14]:
# fig, axes = plt.subplots(2, 3) # fig -> whole figure , axes -> subfigure
# axes[1,1].scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30)) # plt.scatter(x,y)

#### Adjusting the spacing around subplots

```python
subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=None, hspace=None)
```

```python
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)
```

In [15]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)

# wspace and hspace controls the percent of the figure width and figure height
plt.subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=0, hspace=0)

<IPython.core.display.Javascript object>

### §9.1.2 Colors, Markers, and Line Styles

```python
ax.plot(x, y, 'g--') # green line and linestyle='--'
```

```python
ax.plot(x, y, linestyle='--', color='g')
```

In [16]:
from numpy.random import randn
fig = plt.figure()
plt.plot(randn(30).cumsum(), 'ko--') #'k'-> black  'o'->circle marker '--'-> dashed line style

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x263037faeb0>]

plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')

In [17]:
plt.close('all')

In [18]:
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post') # drawstyle='steps' drawstyle='steps-mid'
plt.legend(loc='best') #Place a legend on the Axes ,ex:loc='upper left'

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x2637e4ba3d0>

### §9.1.3 Ticks, Labels, and Legends

#### Setting the title, axis labels, ticks, and ticklabels

In [19]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2637e491f70>]

In [20]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())
ticks = ax.set_xticks([0, 250, 500, 750, 1000])

labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                             rotation=30, fontsize='small')

<IPython.core.display.Javascript object>

In [21]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())
ticks = ax.set_xticks([0, 250, 500, 750, 1000])

labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')

ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Stages')

```python
props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}
ax.set(**props)
```

In [22]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())
ticks = ax.set_xticks([0, 250, 500, 750, 1000])

labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')

props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}
ax.set(**props)

<IPython.core.display.Javascript object>

[Text(0.5, 1.0, 'My first matplotlib plot'), Text(0.5, 0, 'Stages')]

#### Adding legends

In [23]:
from numpy.random import randn
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x26305596100>]

In [24]:
from numpy.random import randn
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')
ax.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x26305098490>

### §9.1.4 Annotations and Drawing on a Subplot

```python
ax.text(x, y, 'Hello world!',
        family='monospace', fontsize=10)
```

In [25]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75), # Series.asof() -> find the value of Series associate with index
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2, #draw a arrow between the positions xy and xytext.
                                headlength=4),
                horizontalalignment='left', verticalalignment='top') # display of xytext

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [26]:
spx

Date
1990-02-01     328.79
1990-02-02     330.92
1990-02-05     331.85
1990-02-06     329.66
1990-02-07     333.75
               ...   
2011-10-10    1194.89
2011-10-11    1195.54
2011-10-12    1207.25
2011-10-13    1203.66
2011-10-14    1224.58
Name: SPX, Length: 5472, dtype: float64

In [27]:
spx.asof(datetime(1990, 2, 1))

328.79

```python
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)
```

In [28]:
fig = plt.figure(figsize=(12, 6)); ax = fig.add_subplot(1, 1, 1)
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3) #Rectangle(xy, width, height, angle=0.0)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)   # look like oval is because figsize=(12, 6)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]], # 3 points
                   color='g', alpha=0.5)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x263065d3490>

### §9.1.5 Saving Plots to File

```python
plt.savefig('figpath.svg')
```

```python
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')
```

```python
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()
```

### §9.1.6 matplotlib Configuration

```python
plt.rc('figure', figsize=(10, 10))
```

```python
font_options = {'family' : 'monospace',
                'weight' : 'bold',
                'size'   : 'small'}
plt.rc('font', **font_options)
```

## §9.2 Plotting with pandas and seaborn
-	Line Plots
-	Bar Plots
-	Histograms and Density Plots
-	Scatter or Point Plots
-	Facet Grids and Categorical Data

### §9.2.1 Line Plots

In [29]:
plt.close('all')

In [30]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s

0    -0.664223
10   -1.646955
20   -2.750678
30   -2.994528
40   -5.126708
50   -6.901047
60   -6.467840
70   -6.129356
80   -4.859401
90   -3.595103
dtype: float64

In [31]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [32]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0), # cumsum(0) sum by row
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))

# plt.style.use('grayscale') # new content in 3rd edition

df

Unnamed: 0,A,B,C,D
0,1.041537,1.276397,-0.066026,0.659542
10,0.729247,2.320109,0.591515,0.733334
20,-0.031255,2.677122,1.790869,0.688508
30,0.669098,1.498836,0.486164,1.183776
40,0.432898,1.404024,-0.014061,1.615359
50,1.520455,1.089641,0.80845,-1.263222
60,1.977574,2.728372,3.96921,-1.982107
70,0.261017,3.403217,3.652359,-3.012927
80,-0.628076,3.626984,3.299177,-3.762952
90,0.12932,2.754734,1.337943,-4.160943


In [33]:
df.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

### §9.2.2 Bar Plots

In [34]:
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [35]:
np.random.seed(12348)

In [36]:
df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df

Genus,A,B,C,D
one,0.37067,0.602792,0.229159,0.486744
two,0.420082,0.571653,0.049024,0.880592
three,0.814568,0.27716,0.880316,0.431326
four,0.37402,0.89942,0.460304,0.100843
five,0.43327,0.125107,0.494675,0.961825
six,0.601648,0.478576,0.20569,0.560547


In [37]:
df.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [38]:
plt.figure()
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [39]:
plt.close('all')

In [40]:
tips = pd.read_csv('examples/tips.csv')
tips

Unnamed: 0,total_bill,tip,smoker,day,time,size
0,16.99,1.01,No,Sun,Dinner,2
1,10.34,1.66,No,Sun,Dinner,3
2,21.01,3.50,No,Sun,Dinner,3
3,23.68,3.31,No,Sun,Dinner,2
4,24.59,3.61,No,Sun,Dinner,4
...,...,...,...,...,...,...
239,29.03,5.92,No,Sat,Dinner,3
240,27.18,2.00,Yes,Sat,Dinner,2
241,22.67,2.00,Yes,Sat,Dinner,2
242,17.82,1.75,No,Sat,Dinner,2


In [41]:
tips = pd.read_csv('examples/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size']) # crosstab => computes a frequency table of factors
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [42]:
# Not many 1- and 6-person parties
party_counts = party_counts.loc[:, 2:5]
party_counts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,16,1,1,0
Sat,53,18,13,1
Sun,39,15,18,3
Thur,48,4,5,1


In [43]:
party_counts.sum(1)

day
Fri     18
Sat     85
Sun     75
Thur    58
dtype: int64

In [44]:
# Normalize to sum to 1
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [45]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='day'>

In [46]:
party_pcts.plot.bar(stacked=True) # new content in 3rd edition

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='day'>

In [47]:
plt.close('all')

In [48]:
import seaborn as sns
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.50,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069
...,...,...,...,...,...,...,...
239,29.03,5.92,No,Sat,Dinner,3,0.256166
240,27.18,2.00,Yes,Sat,Dinner,2,0.079428
241,22.67,2.00,Yes,Sat,Dinner,2,0.096759
242,17.82,1.75,No,Sat,Dinner,2,0.108899


In [49]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h', ci=95)

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [50]:
plt.close('all')

In [51]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [52]:
plt.close('all')

In [53]:
#　Notice that seaborn has automatically changed the aesthetics of plots: the default color palette, plot background, 
#　and grid line colors. You can switch between different plot appearances using seaborn.set_style:
sns.set(style="whitegrid")

### §9.2.3 Histograms and Density Plots

In [54]:
plt.figure()
tips['tip_pct'].plot.hist(bins=50) # 50 segment

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Frequency'>

In [55]:
plt.figure()
tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Density'>

In [56]:
plt.figure()
comp1 = np.random.normal(0, 1, size=200) # random.normal(loc=0.0, scale=1.0, size=None) loc => Mean (“centre”) of the distribution
comp2 = np.random.normal(10, 2, size=200) #scale => Standard deviation
values = pd.Series(np.concatenate([comp1, comp2]))
sns.distplot(values, bins=100, color='k')
# sns.histplot(values, bins=100, color="black") # new content in 3rd edition

<IPython.core.display.Javascript object>



<AxesSubplot:ylabel='Density'>

### §9.2.4 Scatter or Point Plots

In [57]:
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna() # diff => out[i] = a[i+1] - a[i] 
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [58]:
trans_data[:5]

Unnamed: 0,cpi,m1,tbilrate,unemp
1,0.005849,0.014215,0.088193,-0.128617
2,0.006838,-0.008505,0.215321,0.038466
3,0.000681,-0.003565,0.125317,0.05506
4,0.005772,-0.002861,-0.212805,-0.074108
5,0.000338,0.004289,-0.266946,0.0


In [59]:
plt.figure()
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

<IPython.core.display.Javascript object>



Text(0.5, 1.0, 'Changes in log m1 versus log unemp')

In [60]:
sns.pairplot(trans_data, plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x263322555e0>

### §9.2.5 Facet Grids and Categorical Data

In [61]:
tips

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.50,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069
...,...,...,...,...,...,...,...
239,29.03,5.92,No,Sat,Dinner,3,0.256166
240,27.18,2.00,Yes,Sat,Dinner,2,0.079428
241,22.67,2.00,Yes,Sat,Dinner,2,0.096759
242,17.82,1.75,No,Sat,Dinner,2,0.108899


In [62]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x26333518160>

In [63]:
sns.factorplot(x='day', y='tip_pct', row='time',
               col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x26333b5a790>

In [64]:
sns.factorplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x26333a580d0>

## §9.3 Other Python Visualization Tools

In [None]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS

## Conclusion