In [1]:
# Notebook imports and packages
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

%matplotlib inline

#  Differentiating MSE

$$MSE=\frac{1}{n}\sum^n_{i=1}\left( y-\hat{y}\right)^2$$

## Differentiating w/ SymPy

These seem to work, but idk if it would be a good idea to use them as they seem somewhat "sketchy" here and might return something unexpected in some more complex situations.

In [2]:
from sympy import symbols, diff

In [3]:
x = np.array([[0.1, 1.2, 2.4, 3.2, 4.1, 5.7, 6.5]]).T
y = np.array([[1.7, 2.4, 3.5, 3.0, 6.1, 9.4, 8.2]]).T

def mse(y, y_hat): # Teacher's
    return np.average(np.power(y-y_hat,2), axis=0)

def my_mse(theta_0, theta_1):
    y_hat = theta_0 + theta_1 * x
    return np.average(np.power(y-y_hat,2), axis=0)[0]

In [4]:
th0, th1 = symbols('theta_0, theta_1')
my_mse(th0, th1)

0.412857142857143*(-0.588235294117647*theta_0 - 0.0588235294117647*theta_1 + 1)**2 + 0.822857142857143*(-0.416666666666667*theta_0 - 0.5*theta_1 + 1)**2 + 1.46285714285714*(-0.3125*theta_0 - theta_1 + 0.9375)**2 + 1.75*(-0.285714285714286*theta_0 - 0.685714285714286*theta_1 + 1)**2 + 5.31571428571428*(-0.163934426229508*theta_0 - 0.672131147540984*theta_1 + 1)**2 + 9.60571428571428*(-0.121951219512195*theta_0 - 0.792682926829268*theta_1 + 1)**2 + 12.6228571428571*(-0.106382978723404*theta_0 - 0.606382978723404*theta_1 + 1)**2

In [5]:
my_mse(th0, th1).evalf(subs={th0:1,th1:1})

1.51857142857143

In [6]:
y_hat = 1 + 1 * x
mse(y, y_hat), my_mse(1,1)

(array([1.51857143]), 1.5185714285714285)

In [7]:
diff(my_mse(th0, th1), th0), diff(my_mse(th0, th1), th1)

(2.0*theta_0 + 6.62857142857143*theta_1 - 9.8,
 6.62857142857143*theta_0 + 31.1428571428571*theta_1 - 43.6971428571429)

In [8]:
def squared_error(theta_0, theta_1, y, x):
    y_hat=theta_0 + theta_1 * x
    return (y - y_hat)**2

a, b, th0, th1 = symbols(r'y, x, \theta_0, \theta_1')
squared_error(th0, th1, a, b)

(-\theta_0 - \theta_1*x + y)**2

In [9]:
diff(squared_error(th0, th1, a, b),th0)

2*\theta_0 + 2*\theta_1*x - 2*y

In [10]:
diff(squared_error(th0, th1, a, b), th1)

-2*x*(-\theta_0 - \theta_1*x + y)

## Differentiating w/ Symbolab

$$\frac{\partial MSE}{\partial\theta_0}=-\frac{2}{n}\sum^n_{i=1}\left(y^{(i)}-\hat{y}^{(i)}\right)$$
$$\frac{\partial MSE}{\partial\theta_1}=-\frac{2}{n}\sum^n_{i=1}\left(y^{(i)}-\hat{y}^{(i)}\right)x^{(i)}$$

In [11]:
def grad(x, y, thetas):
    y_hat = thetas[0]+thetas[1]*x
    th0 = -2 * np.average((y-y_hat), axis=0)
    th1 = -2 * np.average((y-y_hat)*x, axis=0)
    return np.array([th0[0],th1[0]])

# Running GD

In [12]:
x = np.array([[0.1, 1.2, 2.4, 3.2, 4.1, 5.7, 6.5]]).T
y = np.array([[1.7, 2.4, 3.5, 3.0, 6.1, 9.4, 8.2]]).T

def mse(thetas):
    y_hat = thetas[0]+thetas[1]*x
    return np.average(np.power(y-y_hat,2), axis=0)

In [13]:
multiplier = 0.01
thetas = np.array([2.9, 2.9])

for i in range(10000):
    thetas = thetas - multiplier * grad(x, y, thetas)

print(
    thetas,
    mse(thetas)
)

[0.84753515 1.22272646] [0.94796558]
