This notebook was made to help create figures for lecture.

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

Creating a toy dataset to demostrate the ideas from lecture:

In [None]:
np.random.seed(42)

n = 75
x = np.sort(4*np.random.rand(n) - 2)
def g(x):
    return x**2 - x**4 / 3.5 
y = g(x) + np.random.randn(n)/10
xtest = np.linspace(-2.1, 2.1, 300)

Making the main visualizations:

In [None]:
trace_y = go.Scatter(
    x=x, y=y, mode="markers", name="Y", marker_size = 10,
    marker_color=px.colors.qualitative.Plotly[0])
trace_g = go.Scatter(
    x=xtest, y=g(xtest), mode="lines", name="g(x)", line_width=7,
    line_color=px.colors.qualitative.Plotly[1])

layout = go.Layout(xaxis_range=[-2.1,2.1], yaxis_range=[-1, 1.3], 
                   width=800, height=700, showlegend=True,
                   template="simple_white",
                   font_size=24)

In [None]:
fig = go.Figure([trace_g], layout)
fig.write_image("gx.png", scale=3)
fig

In [None]:
fig = go.Figure([trace_g, trace_y], layout)
fig.write_image("gx_and_Y.png", scale=3)
fig

In [None]:
df = pd.DataFrame()
df["x"] = x
df["y"] = y
np.random.seed(42)
samples = [df.sample(5) for s in range(2)]

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
models = [ Pipeline([
            ('polynomial_transformation', PolynomialFeatures(5)),
            ('linear_regression', LinearRegression())]).fit(s[["x"]], s["y"]) for s in samples]

In [None]:
lines = [
    go.Scatter(x=xtest, y=m.predict(pd.DataFrame({"x": xtest})), 
               mode="lines", name=f"y_hat_{i}", line_width=7,
               line_color=px.colors.qualitative.Plotly[2+i])
    for (i,m) in enumerate(models)
]

markers = [
    go.Scatter(
    x=s["x"], y=s["y"], mode="markers", name=f"y_hat_{i}", marker_size = 20,
    showlegend=False,
    marker_color=px.colors.qualitative.Plotly[2+i])
    for (i,s) in enumerate(samples)
]
fig = go.Figure( [
    go.Scatter(
        x=x, y=y, mode="markers", name="Y", marker_color="gray"),
    go.Scatter(
        x=xtest, y=g(xtest), mode="lines", name="g(x)", line_color="gray"),
    ] + lines + markers, layout)
fig.update_layout(yaxis_range=[-1,1.5])
fig.write_image("Y_samples.png", scale=3)
fig

In [None]:
lines = [
    go.Scatter(x=xtest, y=m.predict(pd.DataFrame({"x": xtest})), 
               mode="lines", name=f"y_hat_{i}", line_width=7,
               line_color=px.colors.qualitative.Plotly[2+i])
    for (i,m) in enumerate(models)
]

markers = [
    go.Scatter(
    x=s["x"], y=s["y"], mode="markers", name=f"y_hat_{i}", marker_size = 20,
    showlegend=False,
    marker_color=px.colors.qualitative.Plotly[2+i])
    for (i,s) in enumerate(samples)
]
fig = go.Figure( [trace_g, trace_y, lines[0], markers[0]], layout)
fig.update_layout(yaxis_range=[-1,1.5])
fig.write_image("Y_samples_one.png", scale=3)
fig