# Introduction
Based on stuf by Michael Droettboom (Space Telescope Science Institute)
- http://github.com/dmcdougall/strata-mpl-tutorial 

Matplotlib is probably the single most used Python package for 2D-graphics. It provides both a very quick way to visualize data from Python and publication-quality figures in many formats.

![Parts of a figure](matplotlib-anatomy.png)

Most of the terms are straightforward but the main thing to remember is that the `Figure` is the final image that may contain 1 or more axes. The `Axes` represent an individual plot.

In [1]:
# use the 'tk' backend so plots come up in windows
%matplotlib tk
# use default settings 
from matplotlib import rcdefaults
rcdefaults()

# Basic Plots

In [2]:
import numpy as np
import matplotlib.pyplot as plt

def f(x, noise_amount):
    y = np.sqrt(x) * np.sin(x)
    noise = np.random.normal(0, 1, len(x))
    return y + noise_amount * noise

X_data  = np.linspace(0, 4.*np.pi, 25)
y_data = f(X_data, noise_amount=0.5)

X_plot = np.linspace(0, 4.*np.pi, 250)
y_plot = f(X_plot, noise_amount=0.)

# Below we set up a basic figure 

plt.figure()
plt.scatter(X_data , y_data)
plt.scatter(X_data , y_data  + 2.)
plt.plot(X_plot, y_plot)

plt.show()

Let's make our x axis a bit more meaningul, and add some labels:

In [3]:
plt.figure()
plt.scatter(X_data , y_data)
plt.scatter(X_data , y_data  + 2.)
# Put ticks exactly where we want them.
plt.xticks([0., np.pi/2, np.pi, 3*np.pi/2, 2*np.pi])
# Set limits of the x axis - we could have used plt.ylim too.
plt.xlim(0., 2*np.pi)
# We can add labels and change this properties (size etc)
plt.xlabel('Theta', fontsize=20, color='blue')
plt.ylabel('Phi', fontsize=20, color='red')
plt.plot(X_plot, y_plot)

plt.show()

We can use Latex inside matplotlib! Just add an r before the string and inclose your maths in dollar signs i.e. ```r'$\omega$'```

In [4]:
plt.figure()
plt.scatter(X_data , y_data)
plt.scatter(X_data , y_data  + 2.)
# Add custom labels to ticks
plt.xticks([0., np.pi/2, np.pi, 3*np.pi/2, 2*np.pi],
           [r'$0$', r'$\pi/2$', r'$\pi$', r'$3\pi/2$', r'$2\pi$'])
# Set limits of the x axis
plt.xlim(0., 2*np.pi)
plt.xlabel(r'$\Theta$', fontsize=20, color='blue')
plt.ylabel(r'$\Phi$', fontsize=20, color='red')
plt.plot(X_plot, y_plot)

plt.show()


And let's annotate some stuff, and make something transparent (I almost always use this somewhere):

In [7]:
plt.figure()
plt.scatter(X_data , y_data)
# Alpha goes between 0 and 1, with 0 completely transparent, and 1 unchanged. 
plt.scatter(X_data , y_data  + 2., alpha = 0.2)
plt.xticks([0., np.pi/2, np.pi, 3*np.pi/2, 2*np.pi],
           [r'$0$', r'$\pi/2$', r'$\pi$', r'$3\pi/2$', r'$2\pi$'])
plt.xlim(0., 2*np.pi)
plt.xlabel(r'$\Theta$', fontsize=20, color='blue')
plt.ylabel(r'$\Phi$', fontsize=20, color='red')
plt.plot(X_plot, y_plot)
# You can dig into the documentation and find loads of cool arrows - but the main
# important things here are 'xy' - the x and y coords of the point of the arrow
# and 'xytext' - where to offset the annoation text to. 
plt.annotate(r'$\mathrm{Middle}$',
             xy=(np.pi, 0.), xycoords='data',
             xytext=(-60, -60), textcoords='offset points', fontsize=16,
             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))

plt.show()

In data science we often want Histograms - just use plt.hist()!

In [39]:
samples = np.random.normal(size = 5000)
plt.figure()
# I often make my histograms a bit transparent by changing alpha.
plt.hist(samples, bins=30, alpha=0.6)
plt.show()

We can even do a 2d Histogram!

In [59]:
# This isn't needed unless we use a log scale.
from matplotlib.colors import LogNorm

# Get samples from a multivariate Gaussian where the variables are correlated.
samples_2d = np.random.multivariate_normal(mean=[6., 12.0], cov=[[0.7, 0.3], [0.3, 0.7]], size=10000) 

x = samples_2d[:,0]
y = samples_2d[:,1]

plt.figure()
plt.hist2d(x, y, bins = 20)
# Optionally we can use a log scale:
# plt.hist2d(x, y, bins = 20, norm=LogNorm())
plt.colorbar()
plt.show()

# Subplots!

More useful stuff here: https://matplotlib.org/examples/pylab_examples/subplots_demo.html, and here: https://matplotlib.org/examples/pylab_examples/demo_tight_layout.html

The format is ```plt.subplot(#rows, #columns, which one to plot)```

In [21]:
plt.figure()
plt.subplot(2, 1, 1)
plt.scatter(X_data , y_data)
plt.subplot(2, 1, 2)
plt.scatter(X_data , y_data)
plt.show()

In [20]:
plt.figure()
plt.subplot(3, 1, 1)
plt.scatter(X_data , y_data)
plt.subplot(3, 1, 2)
plt.scatter(X_data , y_data + 1.)
plt.subplot(3, 1, 3)
plt.scatter(X_data , y_data - 1.)
plt.show()

In [22]:
plt.figure()
plt.subplot(1, 2, 1)
plt.scatter(X_data , y_data)
plt.subplot(1, 2, 2)
plt.scatter(X_data , y_data)
plt.show()

Get even more control with the axes command. The format here is ```plt.axes([bottom corner x, bottom corner y, x size, y size])```. Probably the easiest way to figure it out is to just play around with it.

In [33]:
plt.figure()
plt.axes([0.1, 0.1, 0.8, 0.8])
plt.scatter(X_data , y_data)
plt.axes([0.5, 0.3, 0.3, 0.5])
plt.scatter(X_data, y_data)
# Just the second half of the data in my weird small plot
plt.xlim(np.pi, 2*np.pi)
plt.show()

There's a complicated, but cool zoom example in the matplotlib examples: http://matplotlib.org/examples/pylab_examples/axes_zoom_effect.html

Matplotlib has two APIs: pyplot (we used that above - plt.plot() and so on) and the object-oriented API.

pyplot is convenient for interactive plotting, since it remembers "state", such as the last Axes that was plotted to, in order to plot the next item to it again. Useful for exploratory data analysis. But best practices is to use the object-oriented API like I do below.

In [3]:
from sklearn.linear_model import BayesianRidge, LinearRegression



degree = 10
X_data  = np.linspace(0, 4.*np.pi, 25)
y_data = f(X_data, noise_amount=0.5)

X_plot = np.linspace(0, 4.*np.pi, 250)
y_plot = f(X_plot, noise_amount=0.)
clf_poly = BayesianRidge()
clf_poly.fit(np.vander(X_data, degree), y_data)

X_plot = np.linspace(0, 11, 25)
y_plot = f(X_plot, noise_amount=0)
y_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)

In [23]:
import numpy as np
from matplotlib import style
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)
import matplotlib.pyplot as plt
from sklearn.linear_model import BayesianRidge, LinearRegression

def f(x, noise_amount):
    y = np.sin(x)
    noise = np.random.normal(0, 1, len(x))
    return y + noise_amount * noise

X_data  = np.linspace(0, 2.*np.pi, 25)
y_data = f(X_data, noise_amount=0.5)

X_plot = np.linspace(-0.9, 2.*np.pi + 0.9, 250)
y_plot = f(X_plot, noise_amount=0.)

ys_store = []

for degree in range(1, 7):
    clf_poly = BayesianRidge(alpha_1=3.5)
    clf_poly.fit(np.vander(X_data, degree), y_data)
    y_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)
    ys_store.append((y_mean, y_std))

style.use('seaborn')
palette = plt.get_cmap('Set1')
fig = plt.figure()

for i in range(6):
    ax = fig.add_subplot(3, 2, i+1)
    y_mean, y_std = ys_store[i]
    ax.scatter(X_data, y_data, color='blue', alpha=0.7)
    ax.plot(X_plot, y_plot, 'b--', alpha = 0.3, label=r'$\mathrm{Data}$')
    degree = str(i)
    label = ' '.join([degree, r'$\mathrm{degree}$', r'$\mathrm{poly}$'])
    ax.plot(X_plot, y_mean, color=palette(i+2), alpha=0.9, label=label)
    # Fill between is a very cool function - useful for showing standard deviation.
    ax.fill_between(X_plot, y_mean + y_std, y_mean - y_std, alpha=0.3)
    # This would be plt.xticks in the pyplot API.
    # Often we have to use 'ax.set_something' in the object-oriented API
    ax.set_xticks([0., np.pi/2, np.pi, 3*np.pi/2, 2*np.pi])
    ax.set_xticklabels([r'$0$', r'$\pi/2$', r'$\pi$', r'$3\pi/2$', r'$2\pi$'])
    ax.set_yticks([-1, 0, +1])
    # Again - set_ylim rather than plt.ylim
    ax.set_ylim(-2.5, 2.5)
    ax.set_xlim(-0.9, 2.*np.pi + 0.9)
    if i in range(4):
        ax.tick_params(labelbottom='off')
    if i not in [0,2,4]:
        ax.tick_params(labelleft='off')
    if i == 5:
        ax.annotate(r'$\mathrm{Overfitting}$',
             xy=(X_plot[-10], y_mean[-10]), xycoords='data',
             xytext=(-50, +20), textcoords='offset points', fontsize=16,
             arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
    ax.legend(loc = 'lower left')
    
plt.show()

You can use your own styles if you want - look at the format of styles/halloween.mplstyle, or the default ones at https://github.com/matplotlib/matplotlib/tree/master/lib/matplotlib/mpl-data/stylelib. List of options at https://matplotlib.org/users/customizing.html.

If you're viewing this notebook in the future - it was nearly halloween when I made it...

In [61]:
import numpy as np
from matplotlib import style
from matplotlib import rc
import matplotlib.pyplot as plt

style.use('styles/halloween.mplstyle')

skeletons = np.random.poisson(lam=5.0, size = 5000)

middle_of_first_bin = samples.min() 
middle_of_last_bin = samples.max() 

plt.figure()
plt.hist(skeletons, np.arange(middle_of_first_bin - 0.5, middle_of_last_bin + 0.5, 1))
plt.xticks(np.arange(middle_of_first_bin, middle_of_last_bin + 1, 1))
plt.ylabel('Number of skeletons!')
plt.tight_layout()
plt.show()