In [1]:
def predict(alpha: float, beta: float, x_i: float) -> float:
    return beta * x_i + alpha

In [2]:
def error(alpha: float, beta: float, x_i: float, y_i: float) -> float:
    """
    The error from predicting beta * x_i + alpha
    when the actual value is y_i
    """
    return predict(alpha, beta, x_i) - y_i

In [3]:
import import_ipynb
from linear_algebra import Vector

importing Jupyter notebook from linear_algebra.ipynb


In [4]:
def sum_of_sqerrors(alpha: float, beta: float, x: Vector, y: Vector) -> float:
    return sum(error(alpha, beta, x_i, y_i) ** 2
               for x_i, y_i in zip(x, y))

In [5]:
from typing import Tuple
import import_ipynb
from linear_algebra import Vector
from statisticas import correlation, standard_deviation, mean

importing Jupyter notebook from statisticas.ipynb


<Figure size 432x288 with 0 Axes>

In [6]:
def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]:
    """
    Given two vectors x and y,
    find the least-squares values of alpha and beta
    """
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta

In [7]:
x = [i for i in range(-100, 110, 10)]
y = [3 * i - 5 for i in x]

In [8]:
x[:5], y[:5]

([-100, -90, -80, -70, -60], [-305, -275, -245, -215, -185])

In [9]:
# Should find that y = 3x - 5
assert least_squares_fit(x, y) == (-5, 3)

In [10]:
from statisticas import num_friends_good, daily_minutes_good

In [11]:
alpha, beta = least_squares_fit(num_friends_good, daily_minutes_good)
alpha, beta

(22.94755241346903, 0.903865945605865)

In [12]:
assert 22.9 < alpha < 23.0
assert 0.9 < beta < 0.905

In [13]:
from statisticas import de_mean

In [14]:
def total_sum_of_squares(y: Vector) -> float:
    """the total squared variation of y_i's from their mean"""
    return sum(v ** 2 for v in de_mean(y))

In [15]:
def r_squared(alpha: float, beta: float, x: Vector, y: Vector) -> float:
    """
    the fraction of variation in y captured by the model, which equals
    1 - the fraction of variation in y not captured by the model
    """
    return 1.0 - (sum_of_sqerrors(alpha, beta, x, y) /
                  total_sum_of_squares(y))

In [16]:
rsq = r_squared(alpha, beta, num_friends_good, daily_minutes_good)
rsq

0.3291078377836305

In [17]:
assert 0.328 < rsq < 0.330

In [18]:
# def main():

In [19]:
import random
import tqdm
from gradient_descent import gradient_step

importing Jupyter notebook from gradient_descent.ipynb


In [20]:
num_epochs = 1000
random.seed(0)

In [21]:
guess = [random.random(), random.random()]  # choose random value to start
guess

[0.8444218515250481, 0.7579544029403025]

In [22]:
learning_rate = 0.00001

In [23]:
with tqdm.trange(num_epochs) as t:
    for _ in t:
        alpha, beta = guess
    
        # Partial derivative of loss with respect to alpha
        grad_a = sum(2 * error(alpha, beta, x_i, y_i)
                         for x_i, y_i in zip(num_friends_good,
                                             daily_minutes_good))
    
        # Partial derivative of loss with respect to beta
        grad_b = sum(2 * error(alpha, beta, x_i, y_i) * x_i
                         for x_i, y_i in zip(num_friends_good,
                                             daily_minutes_good))
    
        # Compute loss to stick in the tqdm description
        loss = sum_of_sqerrors(alpha, beta,
                                   num_friends_good, daily_minutes_good)
        t.set_description(f"loss: {loss:.3f}")
    
            # Finally, update the guess
        guess = gradient_step(guess, [grad_a, grad_b], -learning_rate)

loss: 14348.419: 100%|███████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1206.27it/s]


In [24]:
# We should get pretty much the same results
alpha, beta = guess
alpha, beta

(19.41175990380865, 1.1870085907235692)

In [25]:
guess

[19.41175990380865, 1.1870085907235692]

In [26]:
assert 18.9 < alpha < 23.0
assert 0.9 < beta < 1.905

In [27]:
# if __name__ == "__main__": main()