In [38]:
import altair as alt
import pandas as pd
import numpy as np

In [59]:
def dmse(xs, ys, m):
    '''derivative of mse function (in 1-D)'''
    sum_ = 0
    for _x, _y in zip(xs, ys):
        sum_ += -2 * _x * (_y - (m * _x))
    return sum_/len(xs)

def mse(preds, ys):
    '''
    mean squared error function. 
    preds are predictions, ys are truth
    '''
    sq = 0 
    for p, y in zip(preds, ys):
        sq += (p - y) ** 2
    return sq/len(ys)

def get_preds(m_t, xs):
    '''
    Get predictions for xs, based on the parameter m_t
    '''
    preds = []
    for _x in xs:
        p = m_t * _x
        preds.append(p)
    return preds

def generate_some_data(real_m=2):
    '''
    Generate some data, using the equation y=mx + e 
    where e is random Gaussian error
    '''
    xs = np.linspace(0, 10)
    ys = []
    for _x in xs:
        e = np.random.normal(0, 1, 1)[0]
        y = (_x * real_m) + e
        ys.append(y)
    return ys

def plot_line(xs, ys, m_t):
    data = []
    for x, y in zip(xs, ys):
        pred = m_t * x
        data.append({"x": x, "y": y, "pred": pred})

    df = pd.DataFrame(data)

    c = alt.Chart(df).mark_point().encode(
        x="x",
        y="y"
    )

    d = alt.Chart(df).mark_line().encode(
        x="x",
        y="pred"
    )

    return c + d

In [75]:
# learning via gradient descent
eta = .001 
m_t = 1
iters = 10

for i in range(iters):
    d = dmse(xs, ys, m_t)
    m_t -= d * eta
    preds = get_preds(m_t, xs)
    c = plot_line(xs, ys, m_t)

#### Questions 

1. What is each line in the cell called "learning via gradient descent" doing?
2. What happens if you vary eta, the learning rate? 
3. What is the final value of m_t at the end of the loop? Does that make sense, based on the data generating process?
4. Plot the loss vs. iteration 