# Basic Plotting with matplotlib

You can show matplotlib figures directly in the notebook by using the `%matplotlib notebook` and `%matplotlib inline` magic commands. 

`%matplotlib notebook` provides an interactive environment.

In [3]:
%matplotlib notebook

### Matplotlib Architecture
#### Backend
     -- Deals with the rendering of plots to screen or files
     -- In Jupyter notebooks we use the inline backend
#### Artist Layer
     -- Contains containers such as Figure, Subplot, and Axes
     -- Contains primitives, such as a Line2D and Rectangle, and collections, such as a PathCollection
#### Scripting Layer
     -- More effective, use in this course caller pyplot
     -- Simplifies access to the Artist and Backend layers

### visualization methods:
     -- eg, D3.js -- a declarative information visualization method(SVG, HTML)
     -- eg, matplotlib.pyplot --  a procedure information visualization method

In [4]:
import matplotlib as mpl
mpl.get_backend()

'nbAgg'

In [5]:
import matplotlib.pyplot as plt
plt.plot?

In [14]:
# because the default is the line style '-', 
# nothing will be shown if we only pass in one point (3,2)
plt.plot(3, 2)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x19b22518>]

In [15]:
# we can pass in '.' to plt.plot to indicate that we want
# the point (3,2) to be indicated with a marker '.'
plt.plot(3, 2, '.')
## %matplotlib inline is not interactive, however, $matplotlib notebook is interactive

[<matplotlib.lines.Line2D at 0x19594128>]

Let's see how to make a plot without using the scripting layer.

In [16]:
# First let's set the backend without using mpl.use() from the scripting layer, mpl.use() calling from scripting API
## directly interfacing with the artist layer
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure

# create a new figure
fig = Figure()

# associate fig with the backend
canvas = FigureCanvasAgg(fig)

# add a subplot to the fig
ax = fig.add_subplot(111)

# plot the point (3,2)
ax.plot(3, 2, '.')

# save the figure to test.png
# you can see this figure in your Jupyter workspace afterwards by going to
# https://hub.coursera-notebooks.org/
canvas.print_png('test.png')

We can use html cell magic to display the image.

In [17]:
%%html   ## use HTML to see the image
<img src='test.png' />

In [31]:
# create a new figure
plt.figure()

# plot the point (3,2) using the circle marker
plt.plot(3, 2, 'o')
## plt.gcf() can get access to the current figure -- gcf: get current figure
# get the current axes
ax = plt.gca()
print(ax)
# Set axis properties [xmin, xmax, ymin, ymax]
ax.axis([0,6,0,10])


<IPython.core.display.Javascript object>

AxesSubplot(0.125,0.11;0.775x0.77)


[0, 6, 0, 10]

In [27]:
# create a new figure
plt.figure()

# plot the point (1.5, 1.5) using the circle marker
plt.plot(1.5, 1.5, 'o')
# plot the point (2, 2) using the circle marker
plt.plot(2, 2, 'o')
# plot the point (2.5, 2.5) using the circle marker
plt.plot(2.5, 2.5, 'o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1b2acb38>]

In [32]:
# get current axes
ax = plt.gca()
print(ax)
# get all the child objects the axes contains
ax.get_children()


AxesSubplot(0.125,0.11;0.775x0.77)


[<matplotlib.lines.Line2D at 0x19441cf8>,
 <matplotlib.spines.Spine at 0x1bcfe5c0>,
 <matplotlib.spines.Spine at 0x19b34b38>,
 <matplotlib.spines.Spine at 0x1a261860>,
 <matplotlib.spines.Spine at 0x1a261358>,
 <matplotlib.axis.XAxis at 0x195d4d68>,
 <matplotlib.axis.YAxis at 0x1a0bca58>,
 Text(0.5, 1, ''),
 Text(0.0, 1, ''),
 Text(1.0, 1, ''),
 <matplotlib.patches.Rectangle at 0x194297b8>]

# Scatterplots

### pyplot properties:
    -- pyplot is going to retrieve the current figure the function plt.gcf() and get the current axes plt.gca(), pyplot is keeping track of the axes objects for us.
    -- pyplot mirrors the API of the axes objects.
    -- function decoration of the functions in matplotlib end with an open set of keywords arguments.

In [35]:
import numpy as np

x = np.array([1,2,3,4,5,6,7,8])
y = x

plt.figure()
plt.scatter(x, y) # similar to plt.plot(x, y, '.'), but the underlying child objects in the axes are not Line2D
## plt.plot(x, y, '.') is the same as above

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1b26c9b0>]

In [36]:
import numpy as np

x = np.array([1,2,3,4,5,6,7,8])
y = x

# create a list of colors for each point to have
# ['green', 'green', 'green', 'green', 'green', 'green', 'green', 'red']
colors = ['green']*(len(x)-1)
colors.append('red')

plt.figure()

# plot the point with size 100 and chosen colors, size =  set the size of the datapoints
plt.scatter(x, y, s=100, c=colors)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x192a9160>

In [37]:
# convert the two lists into a list of pairwise tuples based on index positions
zip_generator = zip([1,2,3,4,5], [6,7,8,9,10])

print(list(zip_generator))
# the above prints:
# [(1, 6), (2, 7), (3, 8), (4, 9), (5, 10)]

zip_generator = zip([1,2,3,4,5], [6,7,8,9,10])
# The single star * unpacks a collection into positional arguments
print(*zip_generator)
# the above prints:
# (1, 6) (2, 7) (3, 8) (4, 9) (5, 10)

[(1, 6), (2, 7), (3, 8), (4, 9), (5, 10)]
(1, 6) (2, 7) (3, 8) (4, 9) (5, 10)


In [40]:
zip_generator = zip([1,2,3,4,5], [6,7,8,9, 10], [11, 12, 13, 14, 15])

print(list(zip_generator))

[(1, 6, 11), (2, 7, 12), (3, 8, 13), (4, 9, 14), (5, 10, 15)]


In [41]:
zip_generator = zip([1,2,3,4,5], [6,7,8,9])

print(list(zip_generator))

[(1, 6), (2, 7), (3, 8), (4, 9)]


In [38]:
# use zip to convert 5 tuples with 2 elements each to 2 tuples with 5 elements each
print(list(zip((1, 6), (2, 7), (3, 8), (4, 9), (5, 10))))
# the above prints:
# [(1, 2, 3, 4, 5), (6, 7, 8, 9, 10)]


zip_generator = zip([1,2,3,4,5], [6,7,8,9,10])
# let's turn the data back into 2 lists
x, y = zip(*zip_generator) # This is like calling zip((1, 6), (2, 7), (3, 8), (4, 9), (5, 10))
print(x)
print(y)
# the above prints:
# (1, 2, 3, 4, 5)
# (6, 7, 8, 9, 10)

[(1, 2, 3, 4, 5), (6, 7, 8, 9, 10)]
(1, 2, 3, 4, 5)
(6, 7, 8, 9, 10)


In [42]:
plt.figure()
# plot a data series 'Tall students' in red using the first two elements of x and y
plt.scatter(x[:2], y[:2], s=100, c='red', label='Tall students')
# plot a second data series 'Short students' in blue using the last three elements of x and y 
plt.scatter(x[2:], y[2:], s=100, c='blue', label='Short students')

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x193148d0>

In [43]:
# add a label to the x axis
plt.xlabel('The number of times the child kicked a ball')
# add a label to the y axis
plt.ylabel('The grade of the student')
# add a title
plt.title('Relationship between ball kicking and grades')

Text(0.5, 1, 'Relationship between ball kicking and grades')

In [44]:
# add a legend (uses the labels from plt.scatter)
plt.legend()

<matplotlib.legend.Legend at 0x1935b5c0>

In [45]:
# add the legend to loc=4 (the lower right hand corner), also gets rid of the frame and adds a title
plt.legend(loc=4, frameon=False, title='Legend')

<matplotlib.legend.Legend at 0x1935bbe0>

In [46]:
# get children from current axes (the legend is the second to last item in this list)
plt.gca().get_children()

[<matplotlib.collections.PathCollection at 0x19314400>,
 <matplotlib.collections.PathCollection at 0x193148d0>,
 <matplotlib.spines.Spine at 0x192f32b0>,
 <matplotlib.spines.Spine at 0x192cdac8>,
 <matplotlib.spines.Spine at 0x192c66d8>,
 <matplotlib.spines.Spine at 0x192c60f0>,
 <matplotlib.axis.XAxis at 0x192f31d0>,
 <matplotlib.axis.YAxis at 0x1c2ae4a8>,
 Text(0.5, 1, 'Relationship between ball kicking and grades'),
 Text(0.0, 1, ''),
 Text(1.0, 1, ''),
 <matplotlib.legend.Legend at 0x1935bbe0>,
 <matplotlib.patches.Rectangle at 0x192bb828>]

In [49]:
# get the legend from the current axes
legend = plt.gca().get_children()[-2]

In [48]:
# you can use get_children to navigate through the child artists
legend.get_children()[0].get_children()[1].get_children()[0].get_children()

[<matplotlib.offsetbox.HPacker at 0x1937f080>,
 <matplotlib.offsetbox.HPacker at 0x1937f048>]

In [52]:
# import the artist class from matplotlib
from matplotlib.artist import Artist

def rec_gc(art, depth=0):
    if isinstance(art, Artist):
        # increase the depth for pretty printing
        print("  " * depth + str(art))
        for child in art.get_children():
            rec_gc(child, depth+2)

# Call this function on the legend artist to see what the legend is made up of
rec_gc(plt.legend())
rec_gc(legend) ## the same as above command

Legend
    <matplotlib.offsetbox.VPacker object at 0x0000000019483C50>
        <matplotlib.offsetbox.TextArea object at 0x0000000019483B00>
            Text(0, 0, '')
        <matplotlib.offsetbox.HPacker object at 0x0000000019483AC8>
            <matplotlib.offsetbox.VPacker object at 0x0000000019483A20>
                <matplotlib.offsetbox.HPacker object at 0x0000000019483A58>
                    <matplotlib.offsetbox.DrawingArea object at 0x0000000019483668>
                        <matplotlib.collections.PathCollection object at 0x0000000019483748>
                    <matplotlib.offsetbox.TextArea object at 0x0000000019483550>
                        Text(0, 0, 'Tall students')
                <matplotlib.offsetbox.HPacker object at 0x0000000019483A90>
                    <matplotlib.offsetbox.DrawingArea object at 0x00000000194838D0>
                        <matplotlib.collections.PathCollection object at 0x00000000194839B0>
                    <matplotlib.offsetbox.TextArea obj

# Line Plots

In [82]:

linear_data = np.array([1,2,3,4,5,6,7,8])  ## 8 data points
list_linear = list(linear_data**2) + [10]  ## nine data points
plt.figure()
# plot the linear data and the exponential data
plt.plot(linear_data, '-o', list_linear, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1d1c4208>,
 <matplotlib.lines.Line2D at 0x1d1c4320>]

In [34]:
import numpy as np

linear_data = np.array([1,2,3,4,5,6,7,8])
exponential_data = linear_data ** 2

plt.figure()
# plot the linear data and the exponential data
plt.plot(linear_data, '-o', exponential_data, '-o')
## The x-axis is the number of data points
## New things in line charts: 1. We only gave y-axis values to our plot call,  no x-axis values(they will use index positions)
## 2. Different from scatter plot which required us to label the lines directly

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1c03b048>,
 <matplotlib.lines.Line2D at 0x1bfd66d8>]

In [30]:
# plot another series with a dashed red line
plt.plot([22,44,55], '--r')

[<matplotlib.lines.Line2D at 0x1bfb8a90>]

In [35]:
plt.xlabel('Some data')
plt.ylabel('Some other data')
plt.title('A title')
# add a legend with legend entries (because we didn't have labels when we plotted the data series)
plt.legend(['Baseline', 'Competition', 'Us'])

<matplotlib.legend.Legend at 0x1c06a7f0>

In [36]:
# fill the area between the linear data and exponential data, .fill_between() function is not specific for line plots
plt.gca().fill_between(range(len(linear_data)), 
                       linear_data, exponential_data, 
                       facecolor='blue',
                       alpha = 0.25
                       )
## alpha = 0.25 is the transparancy parameter

<matplotlib.collections.PolyCollection at 0x1c079048>

Let's try working with dates!

In [19]:
plt.figure()

observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')
## start, stop(not include), in one day intervals
## we can use the function in pandas pd.to_datetime() to let matplotlib use
## Previous version, matplotlib does not recognize the numpy datetime, so we use pandas datetime, now it works on both
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x17bfaf98>,
 <matplotlib.lines.Line2D at 0x17c468d0>]

Let's try using pandas

In [24]:
import pandas as pd

plt.figure()
observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')
observation_dates = map(pd.to_datetime, observation_dates) # trying to plot a map will result in an error
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')
## The error is because map() function returns a iterator

<IPython.core.display.Javascript object>

RuntimeError: matplotlib does not support generators as input

In [42]:
plt.figure()
observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')
observation_dates = list(map(pd.to_datetime, observation_dates)) # convert the map to a list to get rid of the error
plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')
## the dates overlap heavily

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1c722470>,
 <matplotlib.lines.Line2D at 0x1993e978>]

In [45]:
x = plt.gca().xaxis
## .wreck_gc function to explore what kind of artists the x-axis object actually contains
# rotate the tick labels for the x axis
for item in x.get_ticklabels():
    ## Each of the tick labels are a text object which itself is an artist
    item.set_rotation(45)

In [46]:
# adjust the subplot so the text doesn't run off the image
plt.subplots_adjust(bottom=0.25)

In [47]:
## Matplotlib has a fairly strong connection to LaTex(setting language)
ax = plt.gca()
ax.set_xlabel('Date')
ax.set_ylabel('Units')
ax.set_title('Exponential vs. Linear performance')

Text(0.5, 1, 'Exponential vs. Linear performance')

In [48]:
# you can add mathematical expressions in any text element
ax.set_title("Exponential ($x^2$) vs. Linear ($x$) performance")

Text(0.5, 1, 'Exponential ($x^2$) vs. Linear ($x$) performance')

# Bar Charts

In [62]:
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3)
## pass the x-var and the height of each bar to plt.bar()

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

In [63]:
new_xvals = []

# plot another set of bars, adjusting the new xvals to make up for the first set of bars plotted
for item in xvals:
    new_xvals.append(item+0.3)

plt.bar(new_xvals, exponential_data, width = 0.3 ,color='red')
## This looks good, but all of the x labels are to the far left of the items being described, and not centered.
## We can center them by using the align parameter, but inconvenient when making multiple bars

<BarContainer object of 8 artists>

In [64]:
from random import randint
linear_err = [randint(0,15) for x in range(len(linear_data))] 
print(linear_err)
# This will plot a new set of bars with errorbars using the list of random error values
plt.bar(xvals, linear_data, width = 0.3, yerr=linear_err)

[12, 8, 1, 4, 10, 9, 14, 15]


<BarContainer object of 8 artists>

In [65]:
# stacked bar charts are also possible
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3, color='b')
plt.bar(xvals, exponential_data, width = 0.3, bottom=linear_data, color='r')

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

In [66]:
# or use barh for horizontal bar charts, but we need to change the 'bottom' to the 'left' and 'width' to the 'height'
plt.figure()
xvals = range(len(linear_data))
plt.barh(xvals, linear_data, height = 0.3, color='b')
plt.barh(xvals, exponential_data, height = 0.3, left=linear_data, color='r')

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

### Dejunkifying a Plot

In [74]:
import matplotlib.pyplot as plt
import numpy as np
## Create a bar chart based on rank and popularity, then add x and y ticks and set a title, then call plot.show(), the figure will render.
plt.figure()

languages = ['Python', 'SQL', 'Java', 'C++', 'Javascript']
pos = np.arange(len(languages))
popularity = [56, 39, 34, 34, 29]

plt.bar(pos, popularity, align = 'center')
plt.xticks(pos, languages)
plt.ylabel('% Popularity')
plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Flow', alpha = 0.8)

## Task: remove all the ticks (both axes), and tick labels on the Y axis
## Solution in Python 2.7: plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='on')
## plt.tick_params(axis = 'both', which = 'both', bottom = False, labelbottom = True, labelleft = True)
plt.tick_params(top = False, bottom = False, left = False, right = False, labelbottom = True, labelleft = False)
plt.show()



<IPython.core.display.Javascript object>

In [71]:
plt.figure()
plt.bar(languages, popularity, align = 'center')

<IPython.core.display.Javascript object>

<BarContainer object of 5 artists>

In [75]:
## Remove the frame of bar chart
plt.figure()

languages = ['Python', 'SQL', 'Java', 'C++', 'Javascript']
pos = np.arange(len(languages))
popularity = [56, 39, 34, 34, 29]

plt.bar(pos, popularity, align = 'center')
plt.xticks(pos, languages)
plt.ylabel('% Popularity')
plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Flow', alpha = 0.8)

## Task: remove all the ticks (both axes), and tick labels on the Y axis
## Solution in Python 2.7: plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='off', labelbottom='on')
## plt.tick_params(axis = 'both', which = 'both', bottom = False, labelbottom = True, labelleft = True)
plt.tick_params(top = False, bottom = False, left = False, right = False, labelbottom = True, labelleft = False)
## Task: remove frame
for spine in plt.gca().spines.values():
    spine.set_visible(False)
plt.show()


<IPython.core.display.Javascript object>

In [86]:
## Task: Change the bar colors to be less bright blue, make one bar, the python bar, a contrasting color, 
## soften all labels by turning grey.
plt.figure()

languages =['Python', 'SQL', 'Java', 'C++', 'JavaScript']
pos = np.arange(len(languages))
popularity = [56, 39, 34, 34, 29]
# TODO: change the bar colors to be less bright blue
# TODO: make one bar, the python bar, a contrasting color

bars = plt.bar(pos, popularity, align='center', color = 'lightslategrey', linewidth = 0)
bars[0].set_color('#1F77B4')
# TODO: soften all labels by turning grey
plt.xticks(pos, languages, alpha = 0.8)
plt.ylabel('% Popularity', alpha = 0.8)
plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Overflow', alpha = 0.8)

# remove all the ticks (both axes), and tick labels on the Y axis
plt.tick_params(top = False, bottom = False, left = False, right = False, labelbottom = True, labelleft = False)

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
plt.show()

<IPython.core.display.Javascript object>

In [90]:
## Task: Directly label each bar with Y axis values, and remove the Y label since bars are directly labeled
plt.figure()

languages =['Python', 'SQL', 'Java', 'C++', 'JavaScript']
pos = np.arange(len(languages))
popularity = [56, 39, 34, 34, 29]

# change the bar colors to be less bright blue
bars = plt.bar(pos, popularity, align='center', linewidth=0, color='lightslategrey')
# make one bar, the python bar, a contrasting color
bars[0].set_color('#1F77B4')

# soften all labels by turning grey
plt.xticks(pos, languages, alpha=0.8)

# TODO: remove the Y label since bars are directly labeled
# plt.ylabel('% Popularity', alpha=0.8)
plt.title('Top 5 Languages for Math & Data \nby % popularity on Stack Overflow', alpha=0.8)

# remove all the ticks (both axes), and tick labels on the Y axis
plt.tick_params(top = False, bottom = False, left = False, right = False, labelbottom = True, labelleft = False)

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# TODO: direct label each bar with Y axis values
## Set up the x location and y location for each bar and needed in plt.test() function
for bar in bars:
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 5, str(int(bar.get_height())) + '%', 
                 ha='center', color='w', fontsize=11)

plt.show()

<IPython.core.display.Javascript object>