In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import colorcet as cc

In [None]:
mpg = sns.load_dataset(
    "mpg"
)
mpg

In [None]:
plt.scatter(
    "displacement",
    "mpg",
    data= mpg
)
plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(121)
ax.scatter(
    "displacement",
    "mpg",
    data = mpg
)
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")
ax = fig.add_subplot(122, sharey= ax)
ax.scatter(
    "horsepower",
    "mpg",
    data= mpg
)
ax.set_xlabel("Horsepower")


In [None]:
fig, axes = plt.subplots(1, 2, sharey= "row")
axes[0].set_ylabel("MPG")
for ax, c_name in zip(axes, ["displacement", "horsepower"]):
    ax.scatter(
        c_name,
        "mpg",
        data= mpg
    )
    if c_name == "displacement":
        c_name += " (cc)"
    ax.set_xlabel(c_name.capitalize())
fig.tight_layout()

In [None]:
mapping = {ori: c for ori, c in zip(mpg["origin"].unique(), cc.glasbey_category10)}
mapping

In [None]:
fig, ax = plt.subplots()
for origin, color in mapping.items(): 
    ax.scatter(
        "displacement",
        "mpg",
        color= color,
        label= origin.capitalize(),
        data= mpg.loc[mpg["origin"] == origin]
    )
ax.legend(title= "Origin")
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")

In [None]:
ax = sns.scatterplot(
    data= mpg,
    x= "displacement",
    y= "mpg",
    hue= "origin"
)
ax.set_xlabel("Displacement (cc)")
ax.set_ylabel("MPG")
ax.legend(title= "Origin")

In [None]:
fig, axes = plt.subplots(
    1, 
    2, 
    sharey= "row",
    figsize= (10, 5),
)
for ax, metric in zip(axes, ["displacement", "horsepower"]):
    sns.scatterplot(
        data= mpg,
        x= metric,
        y= "mpg",
        hue= mpg["origin"].str.capitalize(),
        ax= ax,
        alpha= 0.8
    )
    ax.legend(title= "Origin")
    ax.set_xlabel(metric.capitalize())
axes[0].set_ylabel("MPG")
fig.tight_layout()

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    line_kws= {
        "color": "r", 
        "alpha": 0.7
    }
)

In [None]:
sns.residplot(
    data= mpg,
    x= "horsepower",
    y= "mpg"
)

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    line_kws= {
        "color": "r", 
        "alpha": 0.7
    },
    order= 2
)

In [None]:
sns.residplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    order= 2
)

In [None]:
mpg["mpg_sqrt"] = mpg["mpg"] ** (1/2)
mpg["mpg_log"] = np.log(mpg["mpg"])

In [None]:
transforms = ["", "_sqrt", "_log"]
fig, axs = plt.subplots(ncols= len(transforms), figsize= (15, 5))
for (ax, transform) in zip(axs, transforms):
    sns.residplot(
        mpg,
        x= "horsepower",
        y= "mpg" + transform,
        order= 2,
        ax= ax
    ) 
fig.tight_layout()

In [None]:
sns.regplot(
    data= mpg,
    x= "horsepower",
    y= "mpg_log",
    line_kws= {
        "color": "r",
        "alpha": 0.7
    },
    order= 2
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "horsepower",
    y= "mpg",
    col= "origin",
    row= "cylinders",
    hue= "origin"
)

In [None]:
sns.lmplot(
    data= mpg,
    x= "cylinders",
    y= "mpg",
    x_jitter= 0.3
)

In [None]:
sns.pairplot(
    mpg,
    hue= "origin"
)

In [None]:
mpg.groupby("origin")["horsepower"].mean()

In [None]:
fig, ax = plt.subplots()
ax.bar(
    x= "origin",
    height= "horsepower",
    data= mpg.groupby("origin")["horsepower"].mean().reset_index()
)

In [None]:
bar_w_err = mpg.groupby("origin")["horsepower"].mean().to_frame()
bar_w_err["err"] = mpg.groupby("origin")["horsepower"].std()
bar_w_err

In [None]:
fig, ax = plt.subplots()
ax.bar(
    x= "origin",
    height= "horsepower",
    data= bar_w_err.reset_index()
)
ax.errorbar(
    x= "origin",
    y= "horsepower",
    yerr= "err",
    data= bar_w_err.reset_index(),
    fmt= "o",
    capsize= 5,
    color= "k"
)

In [None]:
sns.barplot(
    data= mpg,
    x= "origin",
    y= "horsepower",
    errorbar= "sd",
    capsize= 0.2
)

In [None]:
sns.barplot(
    data= mpg,
    x= "cylinders",
    y= "horsepower",
    hue= "origin",
    errorbar= ("ci", 95),
    capsize= 0.2
)