In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter


# Plotting

Matplotlib has two interfaces. 
    
    1.  The first is based on MATLAB and uses a state-based interface. This is encapsulated in the pyplot module. See the pyplot tutorials for a more in-depth look at the pyplot interface.
    
        a. matplotlib.pyplot is a collection of command style functions that make Matplotlib work like MATLAB. Each pyplot function makes some change to a figure: e.g., creates a figure, creates a plotting area in a figure, plots some lines in a plotting area, decorates the plot with labels, etc.
        
    2. The second is an object-oriented (OO) interface. In this case, we utilize an instance of axes.Axes in order to render visualizations on an instance of figure.Figure.
        
        a. We recommend directly working with the objects, if you need more control and customization of your plots.





**In general, try to use the object-oriented interface over the pyplot interface.**



pyplot is mainly intended for interactive plots and simple cases of programmatic plot generation.

** API Overview https://matplotlib.org/stable/api/index.html **


Here is a link to the pyplot tutorial (state-based) :  https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py

## PyPlot Quickstart

In [None]:
# pyplot is mainly intended for interactive plots and simple cases of programmatic plot generation:



import numpy as np
import matplotlib.pyplot as plt

x = np.arange(0, 5, 0.1)
y = np.sin(x)
plt.plot(x, y)

## OO API
Axes : https://matplotlib.org/stable/api/axes_api.html#matplotlib.axes.Axes
Figures : https://matplotlib.org/stable/api/figure_api.html#matplotlib.figure.Figure

## OO Examples

In [None]:
import matplotlib.pyplot as plt # MATLAB style state-based 
import numpy as np

# Data for plotting
t = np.arange(0.0, 2.0, 0.01)
s = 1 + np.sin(2 * np.pi * t)

fig, ax = plt.subplots()
ax.plot(t, s)

ax.set(xlabel='time (s)', ylabel='voltage (mV)',
       title='About as simple as it gets, folks')
ax.grid()

#fig.savefig("test.png")
plt.show()

In [None]:
x1 = np.linspace(0.0, 5.0)
x2 = np.linspace(0.0, 2.0)

y1 = np.cos(2 * np.pi * x1) * np.exp(-x1)
y2 = np.cos(2 * np.pi * x2)

fig, (ax1, ax2) = plt.subplots(2, 1)
fig.suptitle('A tale of 2 subplots')

ax1.plot(x1, y1, 'o-')
ax1.set_ylabel('Damped oscillation')

ax2.plot(x2, y2, '.-')
ax2.set_xlabel('time (s)')
ax2.set_ylabel('Undamped')

plt.show()

In [None]:
import numpy as np

# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')
plt.show()

In [None]:
data = {'a': np.arange(50),
        'c': np.random.randint(0, 50, 50),
        'd': np.random.randn(50)}
data['b'] = data['a'] + 10 * np.random.randn(50)
data['d'] = np.abs(data['d']) * 100

plt.scatter('a', 'b', c='c', s='d', data=data)
plt.xlabel('entry a')
plt.ylabel('entry b')
plt.show()

Working with multiple figures and axes MATLAB, and pyplot, have the concept of the current figure and the current axes. All plotting functions apply to the current axes. 

The function `gca` returns the current axes (a matplotlib.axes.Axes instance), 
and `gcf` returns the current figure (a matplotlib.figure.Figure instance). 



### references : https://matplotlib.org/stable/tutorials/introductory/pyplot.html#sphx-glr-tutorials-introductory-pyplot-py

# Working with Data

In [None]:

df = pd.read_excel("https://github.com/chris1610/pbpython/blob/master/data/sample-salesv3.xlsx?raw=true")
df.rename(columns={'name': 'Name', 'ext price': 'Sales', 'quantity': 'Purchases'}, inplace=True)

df.head()

In [None]:
#? df
? df.head

## Let's do some calculations

Let's get the total number of purchases and sales for the top 10 customers

In [None]:
group_by_name = df.groupby('Name')
group_by_name

In [None]:
group_by_name.mean()

In [None]:
df.groupby('Name')[['Sales', 'Purchases']]

In [None]:
df.groupby('Name')[['Sales', 'Purchases']].agg({'Sales': 'sum', 'Purchases': 'count'})

In [None]:
top_10 = (df.groupby('Name')[['Sales', 'Purchases']].agg({'Sales': 'sum', 'Purchases': 'count'}).sort_values(by='Sales', ascending=False))[:10].reset_index()


In [None]:
top_10

# Let's plot

In [None]:
#plt.style.available
plt.style.use('ggplot')


In [None]:
top_10.plot(kind='barh', y="Sales", x="Name")


## Let's do some plotting customizations


In [None]:
fig, ax = plt.subplots()
# remember : fig is the final figure, and axes is are the individual subplots
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(5, 6))
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax)
ax.set_xlim([-10000, 140000])
ax.set(title='2014 Revenue', xlabel='Total Revenue', ylabel='Customer')


In [None]:
def currency(x, pos):
    'The two args are the value and tick position'
    if x >= 1e6:
        return '${:1.1f}M'.format(x*1e-6)
    return '${:1.0f}K'.format(x*1e-3)

formatter = FuncFormatter(currency)



In [None]:
fig, ax = plt.subplots()
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax)
ax.set_xlim([-10000, 140000])
ax.set(title='2014 Revenue', xlabel='Total Revenue', ylabel='Customer')
# Set a formatter on the X-axis
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(False)


## Let's insert some extras

In [None]:
fig, ax = plt.subplots()


# Add a line for the average
avg = top_10['Sales'].mean()
ax.axvline(x=avg, color='b', label='Average', linestyle='--', linewidth=1)




# Plot the data and get the averaged
top_10.plot(kind='barh', y="Sales", x="Name", ax=ax)
ax.set_xlim([-10000, 140000])
ax.set(title='2014 Revenue', xlabel='Total Revenue', ylabel='Customer')
# Set a formatter on the X-axis
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(False)


## Multiple Plots

In [None]:
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(7, 4))


In [None]:
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(7, 4))

top_10.plot(kind='barh', y="Sales", x="Name", ax=ax0)
ax0.set_xlim([-10000, 140000])
ax0.set(title='Revenue', xlabel='Total Revenue', ylabel='Customers')



# Plot the average as a vertical line
avg = top_10['Sales'].mean()
ax0.axvline(x=avg, color='b', label='Average', linestyle='--', linewidth=1)

# Repeat for the unit plot
top_10.plot(kind='barh', y="Purchases", x="Name", ax=ax1)
avg = top_10['Purchases'].mean()
ax1.set(title='Units', xlabel='Total Units', ylabel='')
ax1.axvline(x=avg, color='b', label='Average', linestyle='--', linewidth=1)

# Title the figure
fig.suptitle('2014 Sales Analysis', fontsize=14, fontweight='bold');

# Hide the legends
ax1.legend().set_visible(False)
ax0.legend().set_visible(False)

## References

### Matplotlib tutorials : https://matplotlib.org/stable/tutorials/index.html
### Practical Business Python : https://pbpython.com/effective-matplotlib.html