In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import colorcet as cc

In [None]:
mpg = sns.load_dataset(
    "mpg"
)
mpg

<h1>Basic Plotting</h1>
<h3>Pyplot interface</h3>  

- Quick and dirty interface  
- Less explicit about where you are plotting
- Not my preference
<h3>Matplotlib Axis Interface</h3>

- The basis that many of the other interfaces are building on top of
- Fine control over all of the parameters of the plot
- Can become very combersome as plots become more elaborate
<h3>Seaborn Interface</h3>

- Similar to pyplot interface, but provides key conveniences
    - declare specific axes to plot on 
    - automatically map values to colors, sizes, or markers  
    - non-insane heatmap plotting  
    - advanced plots such as clustermaps regression plots, etc. 
<h3>Seaborn Object Oriented Interface</h3>

- Again a bit more gritty than the normal seaborn interface
- Useful for more elaborate and statistically oriented plots

<h1>Pyplot interface</h1>

Call a function -> get a plot
It can be difficult to alter the specific parts of the plot, so I generally avoid this interface
Mostly this is just a wrapper over the axes interface

<h3>Call functions that modify an implied axes</h3>

In [None]:
plt.scatter(
    "displacement",
    "mpg",
    data= mpg
)
plt.show()

<h1>Matplotlib Axes Interface</h1>

- Create a figure
    - can hold multiple plots/axes
    - has figure level labels, legends, etc.
- Add an axes to the figure
    - Axes level labels, titles, legends, etc.
- Plot data onto an axes
    - You can plot multiple times on the same Axes to build up
- Render the figure -> recieve a plot

<h3>Use methods of the objects that make up the plot to mutate those objects</h3>

In [None]:
fig = plt.figure()
ax = fig.add_subplot(121)
ax.scatter(
    "displacement",
    "mpg",
    data = mpg
)
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")
ax = fig.add_subplot(122, sharey= ax)
ax.scatter(
    "horsepower",
    "mpg",
    data= mpg
)
ax.set_xlabel("Horsepower")


In [None]:
fig, axes = plt.subplots(1, 2, sharey= "row")
axes[0].set_ylabel("MPG")
for ax, c_name in zip(axes, ["displacement", "horsepower"]):
    ax.scatter(
        c_name,
        "mpg",
        data= mpg
    )
    if c_name == "displacement":
        c_name += " (cc)"
    ax.set_xlabel(c_name.capitalize())
fig.tight_layout()

<h1>When to move on from using axes directly?</h1>

Matplotlib will not automatically map categories onto colors, or markers, nor will it scale number values to sizes or colormaps.  
Nor will it automatically generate a colorbar when when you map values to colors.  
No direct function to generate heatmap or clustermap plots.  
This functionality can be found in seaborn. 

In [None]:
mpg

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin",
    data= mpg
)

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    marker= "origin",
    data= mpg
)

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    s= "displacement",
    data= mpg
)

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    s= "cylinders",
    data= mpg
)

In [None]:
mpg["origin_c"] = mpg["origin"].astype("category")
mpg["origin_c"]

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)

In [None]:
mpg["origin_c"], uniques = pd.factorize(mpg["origin"])
mpg["origin_c"], uniques

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)

In [None]:
mapping = {ori: np.array(c) for ori, c in zip(mpg["origin"].unique(), cc.glasbey_category10)}
mpg["origin_c"] = mpg["origin"].map(mapping)
mapping

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)
ax.legend()

In [None]:
fig, ax = plt.subplots()
for origin, color in mapping.items(): 
    ax.scatter(
        "displacement",
        "mpg",
        color= color,
        label= origin.capitalize(),
        data= mpg.loc[mpg["origin"] == origin]
    )
ax.legend(title= "Origin")
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")

In [None]:
ax = sns.scatterplot(
    data= mpg,
    x= "displacement",
    y= "mpg",
    hue= "origin"
)
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")
ax.legend(title= "Origin")

In [None]:
fig, axes = plt.subplots(
    1, 
    2, 
    sharey= "row",
    figsize= (10, 5),
)
for ax, metric in zip(axes, ["displacement", "horsepower"]):
    sns.scatterplot(
        data= mpg,
        x= metric,
        y= "mpg",
        hue= mpg["origin"].str.capitalize(),
        ax= ax,
        alpha= 0.8
    )
    ax.legend(title= "Origin")
    ax.set_xlabel(metric.capitalize())
axes[0].set_ylabel("MPG")
fig.tight_layout()

In [None]:
mpg.groupby("origin")["horsepower"].mean()

In [None]:
fig, ax = plt.subplots()
ax.bar(
    x= "origin",
    height= "horsepower",
    data= mpg.groupby("origin")["horsepower"].mean().reset_index()
)

In [None]:
bar_w_err = mpg.groupby("origin")["horsepower"].mean().to_frame()
bar_w_err["err"] = mpg.groupby("origin")["horsepower"].std()
bar_w_err

In [None]:
fig, ax = plt.subplots()
ax.bar(
    x= "origin",
    height= "horsepower",
    data= bar_w_err.reset_index()
)
ax.errorbar(
    x= "origin",
    y= "horsepower",
    yerr= "err",
    data= bar_w_err.reset_index(),
    fmt= "o",
    capsize= 5,
    color= "k"
)

In [None]:
sns.barplot(
    data= mpg,
    x= "origin",
    y= "horsepower",
    errorbar= "sd",
    capsize= 0.2
)

In [None]:
sns.barplot(
    data= mpg,
    x= "cylinders",
    y= "horsepower",
    hue= "origin",
    errorbar= ("ci", 94),
    capsize= 0.2
)

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    line_kws= {
        "color": "r", 
        "alpha": 0.7
    }
)

In [None]:
sns.residplot(
    data= mpg,
    x= "horsepower",
    y= "mpg"
)

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    line_kws= {
        "color": "r", 
        "alpha": 0.7
    },
    order= 2
)

In [None]:
sns.residplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    order= 2
)

In [None]:
mpg["mpg_sqrt"] = mpg["mpg"] ** (1/2)
mpg["mpg_log"] = np.log(mpg["mpg"])

In [None]:
transforms = ["", "_sqrt", "_log"]
fig, axs = plt.subplots(ncols= len(transforms), figsize= (15, 5))
for (ax, transform) in zip(axs, transforms):
    sns.residplot(
        mpg,
        x= "horsepower",
        y= "mpg" + transform,
        order= 2,
        ax= ax
    ) 
fig.tight_layout()

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg_log",
    line_kws= {
        "color": "r",
        "alpha": 0.7
    },
    order= 2
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    col= "origin",
    row= "cylinders",
    hue= "origin"
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "cylinders",
    y= "mpg",
    x_jitter= 0.3
)

In [None]:
sns.pairplot(
    mpg,
    hue= "origin"
)