In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(42)

In [None]:
X = 2 * np.random.rand(100,1)
y = 4 + 3 * X + np.random.randn(100,1)

In [None]:
X[:5]

In [None]:
y[:5]

In [None]:
plt.plot(X, y,'b.')
plt.xlabel('$x_1$')
plt.ylabel('$y$')
plt.axis([0, 2, 0, 15])
plt.show()

# Linear Regression Model
$ \hat{\theta} = \left(\mathbf{X}^T\cdot\mathbf{X} \right)^{-1} \cdot \mathbf{X}^T \cdot y$

In [None]:
np.c_[np.array([1,2,3]), np.array([4,5,6])]

In [None]:
X_b = np.c_[np.ones((100,1)),X]

In [None]:
X_b[:5]

In [None]:
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

In [None]:
np.linalg.inv(X_b.T.dot(X_b))

In [None]:
theta_best

In [None]:
X_new = np.array([[0],[2]])
X_new_b = np.c_[np.ones((2,1)), X_new]

print(X_new)

In [None]:
X_new_b

In [None]:
y_predict = X_new_b.dot(theta_best)
y_predict

In [None]:
plt.plot(X, y,'b.')
plt.plot(X_new, y_predict, 'r-')
plt.axis([0, 2, 0, 15])
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X, y)
print(lm.intercept_,lm.coef_)

In [None]:
lm.predict(X_new)

# Linear Regression using batch gradient descent

In [None]:
eta = 0.1
n_iterations = 1000
m = 100
theta = np.random.randn(2,1)

for i in range(n_iterations):
    gradient = 2/m * X_b.T.dot(X_b.dot(theta) - y)
    theta = theta - eta * gradient

In [None]:
theta

In [None]:
X_new_b.dot(theta)

In [None]:
theta_path_bgd = []

def plot_gd(theta, eta, theta_path=None):
    m = len(X_b)
    plt.plot(X,y,'b.')
    n_iterations = 1000
    for i in range(n_iterations):
        if i<10:
            y_predict = X_new_b.dot(theta)
            style = 'b-' if i > 0 else 'r--'
            plt.plot(X_new, y_predict, style)
        gradient = 2/m * X_b.T.dot(X_b.dot(theta) - y)
        theta = theta - eta * gradient
        
        if i%100 ==0:
            print(gradient, theta)
        
        if theta_path is not None:
            theta_path.append(theta)
     
    plt.xlabel('$x_1$')
    plt.axis([0,2,0,15])
    plt.title(r"$\eta = {}$".format(eta))
    
    print('----------------------------------')
    print()
    

In [None]:
np.random.seed(42)
theta = np.random.randn(2,1)

plt.figure(figsize=(12,4))
plt.subplot(131)
plot_gd(theta, eta=0.02)
plt.subplot(132)
plot_gd(theta, eta=0.1, theta_path=theta_path_bgd)
plt.subplot(133)
plot_gd(theta, eta=0.5)

plt.show()

# Stochastic Gradient Descent

In [None]:
theta_path_sgd = []
m = len(X_b)


In [None]:
n_epochs = 50
t0 = 5
t1 = 50

def learning_schedule(t):
    return t0 / (t + t1)

theta = np.random.randn(2,1)
for epoch in range(n_epochs):
    for i in range(m):
        if epoch == 0 and i < 20:
            y_predict = X_new_b.dot(theta)
            style = 'b-' if i > 0 else 'r--'
            plt.plot(X_new, y_predict, style)
        random_index = np.random.randint(m)
        xi = X_b[random_index:random_index+1]
        yi = X_b[random_index:random_index+1]
        gradient = 2/m * X_b.T.dot(X_b.dot(theta) - y)
        eta = learning_schedule(epoch * m + i)
        theta = theta - eta * gradient
        theta_path_sgd.append(theta)

plt.plot(X,y,'b.')
plt.axis([0,2,0,15])

plt.show()
        

In [None]:
theta

In [None]:
from sklearn.linear_model import SGDRegressor

In [None]:
sgd_reg = SGDRegressor(max_iter=50, tol=-np.infty, penalty=None, eta0=0.1, random_state=101)

In [None]:
y.shape

In [None]:
y.ravel().shape

In [None]:
sgd_reg.fit(X, y.ravel())

In [None]:
sgd_reg.intercept_, sgd_reg.coef_

# Mini-batch gradient descent

In [None]:
theta_path_mgd = []

n_iterations = 50
minibatch_size = 20

np.random.seed(42)
theta = np.random.randn(2,1)  # random initialization

t0, t1 = 200, 1000
def learning_schedule(t):
    return t0 / (t + t1)

t = 0
for epoch in range(n_iterations):
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    for i in range(0, m, minibatch_size):
        t += 1
        xi = X_b_shuffled[i:i+minibatch_size]
        yi = y_shuffled[i:i+minibatch_size]
        gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi)
        eta = learning_schedule(t)
        theta = theta - eta * gradients
        theta_path_mgd.append(theta)

In [None]:
theta

In [None]:
theta_path_bgd = np.array(theta_path_bgd)
theta_path_sgd = np.array(theta_path_sgd)
theta_path_mgd = np.array(theta_path_mgd)

In [None]:
plt.figure(figsize=(7,4))
plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], "r-s", linewidth=1, label="Stochastic")
plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], "g-+", linewidth=2, label="Mini-batch")
plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], "b-o", linewidth=3, label="Batch")
plt.legend(loc="upper left", fontsize=16)
plt.xlabel(r"$\theta_0$", fontsize=20)
plt.ylabel(r"$\theta_1$   ", fontsize=20, rotation=0)
plt.axis([2.5, 4.5, 2.3, 3.9])
plt.show()

# Polinomial Regression

In [None]:
m = 100
X = 10 * np.random.rand(m,1) - 5
y = 0.5 * X**2 + 2 * X + 4 + np.random.randn(m,1)

In [None]:
plt.plot(X,y,'b.')
#plt.axis([-5,5,0,20])

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
poly_features = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly_features.fit_transform(X)

In [None]:
X[0]

In [None]:
X_poly[0]

In [None]:
lin_reg = LinearRegression()

In [None]:
lin_reg.fit(X_poly, y)
lin_reg.intercept_, lin_reg.coef_

In [None]:
X_new = np.linspace(-5,5,100).reshape(100,1)
X_new_poly = poly_features.transform(X_new)
y_new = lin_reg.predict(X_new_poly)

plt.plot(X,y,'b.')
plt.plot(X_new,y_new, 'r-')

In [None]:
poly_features = PolynomialFeatures(degree=20, include_bias=False)
X_poly = poly_features.fit_transform(X)
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)
lin_reg.intercept_, #lin_reg.coef_[:5]

In [None]:
X_new = np.linspace(-5,5,100).reshape(100,1)
X_new_poly = poly_features.transform(X_new)
y_new = lin_reg.predict(X_new_poly)

plt.plot(X,y,'b.')
plt.plot(X_new,y_new, 'r-')
plt.axis([-4,-2,20,60])

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

for style, width, degree in (("g-", 1, 300), ("b--", 2, 2), ("r-+", 2, 1)):
    polybig_features = PolynomialFeatures(degree=degree, include_bias=False)
    std_scaler = StandardScaler()
    lin_reg = LinearRegression()
    polynomial_regression = Pipeline([
            ("poly_features", polybig_features),
            ("std_scaler", std_scaler),
            ("lin_reg", lin_reg),
        ])
    polynomial_regression.fit(X, y)
    y_newbig = polynomial_regression.predict(X_new)
    plt.plot(X_new, y_newbig, style, label=str(degree), linewidth=width)

plt.plot(X, y, "b.", linewidth=3)
plt.legend(loc="upper left")
plt.xlabel("$x_1$", fontsize=18)
plt.ylabel("$y$", rotation=0, fontsize=18)
plt.axis([-3, 3, 0, 10])

plt.show()