In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import seaborn.objects as so
from seaborn import axes_style
import colorcet as cc

In [None]:
mpg = sns.load_dataset(
    "mpg"
)
mpg

<h1>Basic Plotting</h1>
<h3>Pyplot interface</h3>  

- Quick and dirty interface  
- Less explicit about where you are plotting
- Not my preference
<h3>Matplotlib Axis Interface</h3>

- The basis that many of the other interfaces are building on top of
- Fine control over all of the parameters of the plot
- Can become very combersome as plots become more elaborate
<h3>Seaborn Interface</h3>

- Similar to pyplot interface, but provides key conveniences
    - declare specific axes to plot on 
    - automatically map values to colors, sizes, or markers  
    - non-insane heatmap plotting  
    - advanced plots such as clustermaps, regression plots, etc. 
<h3>Seaborn Object Oriented Interface</h3>

- Again a bit more gritty than the normal seaborn interface
- Useful for more elaborate and statistically oriented plots

<h1>Pyplot interface</h1>

Call a function -> get a plot
It can be difficult to alter the specific parts of the plot, so I generally avoid this interface
Mostly this is just a wrapper over the axes interface

<h3>Call functions that modify an implied axes</h3>

In [None]:
plt.scatter(
    "displacement",
    "mpg",
    data= mpg
)
plt.show()

<h1>Make a plot of acceleration vs mpg below</h1>

<h1>Matplotlib Axes Interface</h1>

- Create a figure
    - can hold multiple plots/axes
    - has figure level labels, legends, etc.
- Add an axes to the figure
    - Axes level labels, titles, legends, etc.
- Plot data onto an axes
    - You can plot multiple times on the same Axes to build up
- Render the figure -> recieve a plot

<h3>Use methods of the objects that make up the plot to mutate those objects</h3>

In [None]:
fig = plt.figure()
ax = fig.add_subplot(
    1,
    2,
    1
)
ax.scatter(
    "displacement",
    "mpg",
    data = mpg
)
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")
ax = fig.add_subplot(122, sharey= ax)
ax.scatter(
    "horsepower",
    "mpg",
    data= mpg
)
ax.set_xlabel("Horsepower")


In [None]:
fig, axes = plt.subplots(1, 2, sharey= "row")
axes[0].set_ylabel("MPG")
for ax, c_name in zip(axes, ["displacement", "horsepower"]):
    ax.scatter(
        c_name,
        "mpg",
        data= mpg
    )
    if c_name == "displacement":
        c_name += " (cc)"
    ax.set_xlabel(c_name.capitalize())
fig.tight_layout()

<h1>Make a figure with plots of acceleration and weight vs mpg below</h1>

For extra credit:
- make the plots stacked rather than side-by-side
- label the axis correctly
- make the figure taller than the default (a parameter in plt.subplots())  

you will have to use the documentation for these:
- add a figure title (suptitle)
- make the figure title bold
- place the figure legend in the top left corner
- use one lable for both y axes

<h1>When to move on from using Axes directly?</h1>

Matplotlib will not automatically map categories onto colors, or markers, nor will it scale number values to sizes or colormaps.  
Nor will it automatically generate a colorbar when when you map values to colors.  
No direct function to generate heatmap or clustermap plots.  
This functionality can be found in seaborn. 

In [None]:
mpg

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin",
    data= mpg
)

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    marker= "origin",
    data= mpg
)

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    s= "displacement",
    data= mpg
)

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    s= "cylinders",
    data= mpg
)

In [None]:
mpg["origin_c"] = mpg["origin"].astype("category")
mpg["origin_c"]

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)

In [None]:
mpg["origin_c"], uniques = pd.factorize(mpg["origin"])
mpg["origin_c"], uniques

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)

<h3>"origin_c" has been "color mapped" and the default color map "viridis" has been scaled to the values [0, 1, 2]<h3>

In [None]:
fig, ax = plt.subplots()
scatter_hdl = ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)
fig.colorbar(scatter_hdl, ax= ax)

In [None]:
fig, ax = plt.subplots()
scatter_hdl = ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    cmap= "plasma",
    data= mpg
)
fig.colorbar(scatter_hdl, ax= ax)

In [None]:
mapping = dict(zip(mpg["origin"].unique(), cc.glasbey_category10))
mpg["origin_c"] = mpg["origin"].map(mapping)
mapping

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    "horsepower",
    "mpg",
    c= "origin_c",
    data= mpg
)
ax.legend()

<h3>Matplotlib is not aware of the connection between "origin" and "origin_c"</h3>  
<h3>Plotting to the same axes multiple times gives the expected result</h3>

In [None]:
fig, ax = plt.subplots()
for origin, color in mapping.items(): 
    ax.scatter(
        "displacement",
        "mpg",
        color= color,
        label= origin.capitalize(),
        data= mpg.loc[mpg["origin"] == origin]
    )
ax.legend(title= "Origin")
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")

<h1>Using the axes interface, make a plot of acceleration vs MPG, where the size of the dot indicates the weight</h1>

for extra credit:
- fix the marker size
- use a different marker for each country of origin (https://matplotlib.org/stable/api/markers_api.html#module-matplotlib.markers)
- label each country of origin in the legend
- use colorcet to make all the markers the default blue color
- set the axis to both start at 0


<h1>Seaborn Functional Interface</h1>

Similar to the pyplot interface, but returns a Axes object, and can accept an Axes object to plot onto.  
Will automatically map rational values.

In [None]:
ax = sns.scatterplot(
    data= mpg,
    x= "displacement",
    y= "mpg",
    size= "cylinders",
    hue= "origin"
)
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")
ax.legend()

In [None]:
fig, axes = plt.subplots(
    1, 
    2, 
    sharey= "row",
    figsize= (10, 5),
)
for ax, metric in zip(axes, ["displacement", "horsepower"]):
    sns.scatterplot(
        data= mpg,
        x= metric,
        y= "mpg",
        hue= mpg["origin"].str.capitalize(),
        ax= ax,
        alpha= 0.8
    )
    ax.legend(title= "Origin")
    ax.set_xlabel(metric.capitalize())
axes[0].set_ylabel("MPG")
fig.tight_layout()

<h1>Using the seaborn functional interface, make a plot of acceleration vs MPG, where the size of the dot indicates the weight</h1>

for extra credit:
- use a different marker for each country of origin 
- label each country of origin in the legend
- set the axis to both start at 0

<h1>A Facetgrid can make seperate plots using data grouped by a third variable</h1>  

 - Requires long form data  
 - Columns and rows of graphs can each be specified with a dataframe column  

 1. Create a Facetgrid object with a dataframe and indicate column, row, and color indicators.
 2. Pass your seaborn plotting function, followed by its arguments to Facetgrid.map() or Facetgrid.map_dataframe()
    - use Facetgrid.map_dataframe() if you want to pass column names as plotting function arguments
 3. The Facetgrid object provides some of the interfaces of the Figure and Axes as convienence functions.

In [None]:
tips = sns.load_dataset("tips")
tips

In [None]:
g = sns.FacetGrid(
    tips,
    row= "sex",
    col= "time",
    hue= "smoker",
)
g.map(
    sns.scatterplot,
    "total_bill",
    "tip",
)
g.add_legend(title= "Smoker")

In [None]:
cols = ["mpg", "displacement", "horsepower"]
melt = mpg[cols + ["origin"]].melt(
    id_vars= "origin",
    value_vars= cols, 
)
melt

In [None]:
g = sns.FacetGrid(
    melt,
    col= "variable",
    sharex= False,
    hue= "origin",
)
g.map_dataframe(
    sns.histplot, 
    "value",
    multiple= "stack",
)

In [None]:
g = sns.FacetGrid(
    melt,
    col= "variable",
    sharex= False
)
g.map_dataframe(
    sns.histplot, 
    "value",
    hue= "origin",
    multiple= "stack",
)

<h1>Use a Facetgrid to plot the total bill against the tip data, grouped by the day</h1>  

You may get better results using map or map_dataframe  

For extra credit:
 - Wrap the graphs onto a new row after 2 columns
 - Color the dots by the sex column
 - Set the size of the dots based off the size of the table
 - Add a legend
 - Set the x and y labels to be "Total Bill" and "Tip" respectively
 - Set the title of each graph to be just the appropriate day of the week (the column name)
    - https://seaborn.pydata.org/generated/seaborn.FacetGrid.set_titles.html

<h3>Seaborn interface will aggregate when appropriate, while matplotlib requires manual calculations.</h3>

In [None]:
mpg.groupby("origin")["horsepower"].mean()

In [None]:
fig, ax = plt.subplots()
ax.bar(
    x= "origin",
    height= "horsepower",
    data= mpg.groupby("origin")["horsepower"].mean().reset_index()
)

In [None]:
bar_w_err = mpg.groupby("origin")["horsepower"].mean().to_frame()
bar_w_err["err"] = mpg.groupby("origin")["horsepower"].std()
bar_w_err

In [None]:
fig, ax = plt.subplots()
ax.bar(
    x= "origin",
    height= "horsepower",
    data= bar_w_err.reset_index()
)
ax.errorbar(
    x= "origin",
    y= "horsepower",
    yerr= "err",
    data= bar_w_err.reset_index(),
    fmt= "o",
    capsize= 5,
    color= "k"
)

In [None]:
sns.barplot(
    data= mpg,
    x= "origin",
    y= "horsepower",
    errorbar= "sd",
    capsize= 0.2
)

In [None]:
sns.barplot(
    data= mpg,
    x= "cylinders",
    y= "horsepower",
    hue= "origin",
    errorbar= ("ci", 95),
    capsize= 0.2
)

<h1>Seaborn Object Oriented Interface</h1>

In [None]:
so.Plot(
    mpg,
    x= "cylinders",
    y= "horsepower",
    color= "origin"
).add(
    so.Bar(),
    so.Agg(),
    so.Dodge(),
).add(
    so.Range(linewidth= 2.5),
    so.Est("mean", ("ci", 95)),
    so.Dodge(),
).theme(
    axes_style("ticks")
)


<h1>Using Seaborn Objects interface, create a swarmplot of horsepower by number of cylinders</h1>  

You will have to use so.Jitter to spread out the dots.  

extra credit:
 - make cylinders a categorical variable
 - divide up countries of origin by color 
   - The order of your transforms, like jitter and dodge matters.
   - drop the empty values in each category
 - add an error bar next each swarm plot of the mean +/- the standard deviation
   - use the Shift transformation to move the errorbar sideways
   - fix the order by scaling the color variable
   - because the color variable is a categorical the scale is Nominal
   - the order must be a list type
 - add a bargraph with the mean for each swarm

<h3>Matplotlib treats heatmaps as low resolution images</h3> 

Heatmaps are created with ax.imshow() function, colorbars must be added manually.

In [None]:
iris = sns.load_dataset("iris")
iris

In [None]:
fig, ax = plt.subplots()
ax.imshow(
    iris.drop(columns= "species")
)

In [None]:
fig, ax = plt.subplots()
ax.imshow(
    iris.drop(columns= "species"),
    aspect= "auto"
)

In [None]:
fig, ax = plt.subplots()
hdl = ax.imshow(
    iris.drop(columns= "species"),
    aspect= "auto",
    interpolation= "none"
)
fig.colorbar(hdl, ax= ax)
ax.set_xticks(
    list(range(4)), 
    labels= iris.columns.drop("species")
)

In [None]:
ax = sns.heatmap(
    iris.drop(columns= "species"),
)

In [None]:
mapping = dict(zip(iris["species"].unique(), cc.glasbey_category10))
ax = sns.clustermap(
    data= iris.drop(columns= "species"),
    row_colors= iris["species"].map(mapping),
)

<h1>Using the brain dataset below, make a clustermap of the correlation bewteen the columns</h1>  

for extra credit:
 - use the vlag colormap
 - map the unique network levels to colors and then use that to set the column colors

In [None]:
brain = sns.load_dataset("brain_networks", header= [0, 1, 2], index_col= 0)
used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
used_columns = (brain.columns.get_level_values("network")
                          .astype(int)
                          .isin(used_networks))
brain = brain.loc[:, used_columns]
brain

<h1>Various example plots</h1>

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    line_kws= {
        "color": "r", 
        "alpha": 0.7
    }
)

In [None]:
sns.residplot(
    data= mpg,
    x= "horsepower",
    y= "mpg"
)

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    line_kws= {
        "color": "r", 
        "alpha": 0.7
    },
    order= 2
)

In [None]:
sns.residplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    order= 2
)

In [None]:
mpg["mpg_sqrt"] = mpg["mpg"] ** (1/2)
mpg["mpg_log"] = np.log(mpg["mpg"])

In [None]:
transforms = ["", "_sqrt", "_log"]
fig, axs = plt.subplots(ncols= len(transforms), figsize= (15, 5))
for (ax, transform) in zip(axs, transforms):
    sns.residplot(
        mpg,
        x= "horsepower",
        y= "mpg" + transform,
        order= 2,
        ax= ax
    ) 
fig.tight_layout()

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg_log",
    line_kws= {
        "color": "r",
        "alpha": 0.7
    },
    order= 2
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    col= "origin",
    row= "cylinders",
    hue= "origin"
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "cylinders",
    y= "mpg",
    x_jitter= 0.3
)

In [None]:
sns.pairplot(
    mpg,
    hue= "origin"
)