In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

##### Linear looking data

In [None]:
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
plt.plot(X, y, ".")

##### Calculation of the parameter vector that solves the Normal equation

In [None]:
X_b = np.c_[np.ones((100, 1)), X]   # to add x0=1 to each instance
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
# for the data generated theta best should be ~ (4, 3)
theta_best

In [None]:
# predictions
X_new = np.array([[0], [2]])
X_new_b = np.c_[np.ones((2, 1)), X_new]
y_predict = X_new_b.dot(theta_best)
plt.plot(X_new, y_predict, "r-")
plt.plot(X, y, "b.")

##### Linear regression with scikit-learn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(X, y)
lin_reg.intercept_, lin_reg.coef_

In [None]:
lin_reg.predict(X_new)

Polynomial Regression

In [None]:
# non linear data
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)
plt.plot(X, y, ".")

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)   # original X + its square
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)

#### Learning curves

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
def plot_learning_curves(model, X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    train_errors, val_errors = [], []
    for m in range(1, len(X_train)):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_val_predict = model.predict(X_val)
        train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
        val_errors.append(mean_squared_error(y_val, y_val_predict))
    plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")
    plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")

In [None]:
lin_reg = LinearRegression()
plot_learning_curves(lin_reg, X, y)

In [None]:
from sklearn