In [None]:
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

import seaborn as sns

# Add a Dartmouth-y color theme
import matplotlib as mpl
dartmouth_colors = ["#00693E", "#12312B", "#C3DD88", "#6EAA8D", "#797979", "#EBF3EF"]
mpl.rcParams.update({
                        'figure.facecolor': "#EBF3EF",
                        'figure.figsize': [7.50, 3.50],
                        'axes.prop_cycle': mpl.cycler(color=dartmouth_colors),
                        'axes.facecolor': "#FFFFFF",
                        'axes.labelcolor': '#12312B',
                        'text.color': '#12312B'
                    })

%config InlineBackend.figure_formats = ['svg']


In [None]:
penguins = sns.load_dataset('penguins').dropna()
penguins.head()

In [None]:
plt.figure(figsize=(5, 5))
sns.lmplot(penguins, x='flipper_length_mm', y='body_mass_g', fit_reg=False)
plt.xlabel('Flipper length [mm]')
plt.ylabel('Body mass [g]')

In [None]:
plt.figure(figsize=(5, 5))
sns.lmplot(penguins, x='flipper_length_mm', y='body_mass_g', ci=None)
plt.xlabel('Flipper length [mm]')
plt.ylabel('Body mass [g]')

In [None]:
n = 2

plt.figure(figsize=(5, 5))
poly = PolynomialFeatures(degree=n)
X_poly = poly.fit_transform(penguins.flipper_length_mm.values.reshape(-1, 1))
lr = LinearRegression().fit(X_poly, penguins.body_mass_g)
y_pred = lr.predict(X_poly)

plt.figure(figsize=(5, 5))
sns.lmplot(penguins, x='flipper_length_mm', y='body_mass_g', fit_reg=False)
plt.xlabel('Flipper length [mm]')
plt.ylabel('Body mass [g]')
sns.lineplot(x=penguins.flipper_length_mm, y=y_pred, linestyle='-.')

In [None]:
n = 5

plt.figure(figsize=(5, 5))
poly = PolynomialFeatures(degree=n)
X_poly = poly.fit_transform(penguins.flipper_length_mm.values.reshape(-1, 1))
lr = LinearRegression().fit(X_poly, penguins.body_mass_g)
y_pred = lr.predict(X_poly)

plt.figure(figsize=(5, 5))
sns.lmplot(penguins, x='flipper_length_mm', y='body_mass_g', fit_reg=False)
plt.xlabel('Flipper length [mm]')
plt.ylabel('Body mass [g]')
sns.lineplot(x=penguins.flipper_length_mm, y=y_pred, linestyle='-.')

In [None]:
%matplotlib widget
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(projection='3d')
ax.scatter(penguins.bill_length_mm, penguins.flipper_length_mm, penguins.body_mass_g)

ax.set_xlabel('Bill length [mm]')
ax.set_ylabel('Flipper length [mm]')
ax.set_zlabel('Body mass [g]')
fig.tight_layout()

%matplotlib inline

In [None]:
%matplotlib widget
X = penguins[['bill_length_mm', 'flipper_length_mm']]
lr = LinearRegression().fit(X, penguins.body_mass_g)
predicted = lr.predict(X)

fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(projection='3d')
ax.scatter(penguins.bill_length_mm, penguins.flipper_length_mm, penguins.body_mass_g)

bills, flippers = np.meshgrid(penguins.bill_length_mm, penguins.flipper_length_mm)
mass = lr.intercept_ + lr.coef_[0] * bills + lr.coef_[1] * flippers

ax.plot_surface(bills, flippers, mass, rstride=40)
ax.set_xlabel('Bill length [mm]')
ax.set_ylabel('Flipper length [mm]')
ax.set_zlabel('Body mass [g]')
%matplotlib inline

In [None]:
# Get best fit
lr = LinearRegression()

lr.fit(penguins.flipper_length_mm.values.reshape(-1, 1), penguins.body_mass_g)
beta0 = lr.intercept_
beta1 = lr.coef_[0]
print(f"𝛽_0 = {beta0}, 𝛽_1 = {beta1}")

n = 20

def calculate_loss(b0, b1):
    y_pred = b0 + b1 * penguins.flipper_length_mm
    return sum(abs(penguins.body_mass_g - y_pred))

b0 = np.linspace(beta0//2, beta0*2, n)
b1 = np.linspace(beta1//2, beta1*2, n)

fig = plt.figure(figsize=(5, 5))
plt.subplot(2, 1, 1)
plt.scatter(b0, [calculate_loss(bb0, beta1) for bb0 in b0])
plt.subplot(2, 1, 2)
plt.scatter(b1, [calculate_loss(beta0, bb1) for bb1 in b1])
plt.tight_layout(pad=1.2)

In [None]:
bb0, bb1 = np.meshgrid(b0, b1)
error = np.zeros_like(bb0)
for i in range(bb0.shape[0]):
    for j in range(bb0.shape[1]):
        error[i,j] = calculate_loss(bb0[i,j], bb1[i,j])

fig = plt.figure(figsize =(14, 9))
ax = plt.axes(projection ='3d')

# Creating plot
ax.scatter(bb0, bb1, error)
ax.scatter(beta0, beta1, calculate_loss(beta0, beta1), marker='d', s=200)

In [None]:
lse = list()
N = 10
for n in range(1, N):
    poly = PolynomialFeatures(degree=n)
    X_poly = poly.fit_transform(penguins.flipper_length_mm.values.reshape(-1, 1))
    lr = LinearRegression().fit(X_poly, penguins.body_mass_g)
    y_pred = lr.predict(X_poly)
    lse.append(sum(abs(y_pred - penguins.body_mass_g)))

plt.figure(figsize=(5, 5))
plt.plot(range(1, N), lse, '--o')
plt.xlabel("Degree of polynomial")
plt.ylabel("Least-square error")


In [None]:
# Create exponential data
N = 20
x = np.linspace(0, 10, N)
y = 1 + x + x ** 2
noise = np.random.normal(0,5,N)
y += noise
plt.figure(figsize=(5,5))
plt.scatter(x, y)
plt.xlabel('x')
plt.xlabel('y')

In [None]:
underfitting = LinearRegression().fit(x.reshape(-1, 1), y).predict(x.reshape(-1, 1))
plt.figure(figsize=(5,5))
plt.scatter(x, y)
plt.plot(x, underfitting, label='underfitting')
plt.legend()

In [None]:
n = 2
poly = PolynomialFeatures(degree=n)
x_fine = np.linspace(0, 10, 10*N)
X_poly = poly.fit_transform(x.reshape(-1, 1))
X_poly_fine = poly.fit_transform(x_fine.reshape(-1, 1))

good_fit = LinearRegression().fit(X_poly, y).predict(X_poly_fine)

plt.figure(figsize=(5,5))
plt.scatter(x, y)
plt.plot(x, underfitting, label='underfitting')
plt.plot(x_fine, good_fit, label='good fit')
plt.legend()

In [None]:
n = 20
poly = PolynomialFeatures(degree=n)
X_poly = poly.fit_transform(x.reshape(-1, 1))
X_poly_fine = poly.fit_transform(x_fine.reshape(-1, 1))

overfitting = LinearRegression().fit(X_poly, y).predict(X_poly_fine)

plt.figure(figsize=(5,5))
plt.scatter(x, y)
plt.plot(x, underfitting, label='underfitting')
plt.plot(x_fine, good_fit, label='good_fit')
plt.plot(x_fine, overfitting, label='overfitting')
plt.legend()

In [None]:
# Plot sigmoid function

x = np.linspace(-6, 6, 30)
y = 1 / (1 + np.exp(-x))
plt.figure(figsize=(5, 2.5))
plt.plot(x, y)
plt.grid()