# Gradient Descent

This notebook is a showcase for gradient descent on linear regression. "Run all cells" and scroll to the bottom for the visualizations.

In [None]:
%pip install bokeh
%pip install plotly

#### Dependencies

conda install bokeh

conda install plotly

#### Managing imports

In [3]:
from ipywidgets import interact, FloatSlider, widgets, Layout
import numpy as np

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.layouts import layout
from bokeh.models import LinearColorMapper, LogColorMapper

from IPython.display import clear_output

import plotly.graph_objects as go

#### Initiating bokeh library

In [4]:
output_notebook()
bgcolor = "#efefef"
width = 400
height = 250
height2 = 180


### Linear Regressor

In [5]:

class LinearRegressor:

    def __init__(self, w0, w1, train_X, train_t):
        self.w0 = w0
        self.w1 = w1
        self.train_X = train_X
        self.train_t = train_t
        self.eps = .1

    def get_internal_predictions(self, x, w0, w1):
        return w0 + w1 * x

    def get_predictions(self, x):
        return self.get_internal_predictions(x, self.w0, self.w1)

    def costs(self, w0, w1):
        y_pred = self.get_internal_predictions(self.train_X, w0, w1)
        diff = (self.train_t - y_pred)
        sumErrorsSquared = np.dot(diff, diff) / (len(y_pred))
        return sumErrorsSquared

    def loss(self):
        return self.costs(self.w0, self.w1)

    def loss_w0(self):
        w0_costs = [self.costs(w, self.w1) for w in w_domain]
        w0_costs_grad = (self.costs(self.w0 + self.eps, self.w1) - self.costs(self.w0 - self.eps, self.w1)) / (
                    2. * self.eps)  # [(self.costs(w+self.eps, self.w1)-self.costs(w-self.eps, self.w1)) / (2.*self.eps)  for w in w_domain]
        self.grad_w0 = w0_costs_grad
        return w0_costs, w0_costs_grad

    def loss_w1(self):
        w1_costs = [self.costs(self.w0, w) for w in w_domain]
        w1_costs_grad = (self.costs(self.w0, self.w1 + self.eps) - self.costs(self.w0, self.w1 - self.eps)) / (
                    2. * self.eps)
        self.grad_w1 = w1_costs_grad
        return w1_costs, w1_costs_grad

    def gradient(self):
        # returns the analytical loss gradient as a pair (for w0 and w1)
        y_pred = self.get_internal_predictions(self.train_X, self.w0, self.w1)
        diff = (y_pred - self.train_t)
        w0_grad = (2. * np.sum(diff) / len(y_pred))
        gradSum = np.dot(diff, self.train_X)
        w1_grad = 2. * gradSum / len(y_pred)
        return w0_grad, w1_grad

#### Miscellaneous functions

In [6]:
def lin_func(w0, w1, x):
    return w0 + w1 * x
def setx(plot,xval):
    plot.data_source.data['x'] = xval
def sety(plot,yval):
    plot.data_source.data['y'] = yval
def setxy(plot,xval,yval):
    plot.data_source.data = {'x': list(xval), 'y': list(yval)}

### Plot: Trained Model
Shows data points and current model. Can be manually fitted with sliders below plot.

In [7]:
def plot1_init(w0,w1):
    y = lin_func(w0, w1, x)
    global s1, r1a, r1b, err1
    err1 = []
    s1 = figure(title="Trained model", plot_width=width, plot_height=height2, background_fill_color=bgcolor, y_axis_label="y = w0+w1*x",x_axis_label="x")
    r1a = s1.line(x,y, color="orange", line_width=1.5, alpha=.8)#, legend="Linear model")
    r1b = s1.diamond_cross(train_X,train_t)
    for i in range(len(train_X)):
        err1.append(s1.line([train_X[i],train_X[i]],[train_t[i],lin_func(w0,w1,train_X[i])]))
def plot1_update(w0,w1):
    global err1
    setxy(r1a,x,lin_func(w0, w1, x))  
    xerr = r1b.data_source.data['x']
    yerr = r1b.data_source.data['y']
    for i in range(len(xerr)):
        sety(err1[i],[yerr[i],lin_func(w0,w1,xerr[i])])
def plot1_reset(w0,w1):
    setxy(r1b,train_X,train_t)
    xerr = r1b.data_source.data['x']
    yerr = r1b.data_source.data['y']
    for i in range(len(xerr)):
        setxy(err1[i],[train_X[i],train_X[i]],[yerr[i],lin_func(w0,w1,xerr[i])])

### Loss Plots
These two plots are concerned with the costs vs a single parameter (w0 and w1) in the current model. The dashed green line represents the tangent at current point.

In [8]:
def plot2_init():
    global s2,r2b,r2r,r2tangent,s2_2,r2b_2,r2r_2,r2tangent_2,w_domain,loss_w0,w_min,w_max
    w_min = -10.
    w_max = 10.
    w_domain = np.arange(w_min, w_max, 0.25)
    loss_w0 = w_domain
    s2 = figure(title="Loss for w0 in 1D", plot_width=width, plot_height=height, background_fill_color=bgcolor, y_axis_label="Loss(w0,w1)",x_axis_label="w0")
    s2_2 = figure(title="Loss for w1 in 1D", plot_width=width, plot_height=height, background_fill_color=bgcolor, y_axis_label="Loss(w0,w1)",x_axis_label="w1")
    r2b = s2.line(w_domain, loss_w0, color="blue", line_width=1.5, alpha=.8)
    r2b_2 = s2_2.line(w_domain, loss_w0, color="blue", line_width=1.5, alpha=.8)
    r2r = s2.cross([1],[1],color="red",size=8,line_width=5)
    r2r_2 = s2_2.cross([1],[1],color="red",size=8,line_width=5)
    r2tangent = s2.line(w_domain, np.ones_like(w_domain), line_color="green", line_width=1.5, line_dash="4 4")
    r2tangent_2 = s2_2.line(w_domain, np.ones_like(w_domain), line_color="green", line_width=1.5, line_dash="4 4")

def plot2_update():
    global s2, r2b, r2r, r2tangent, history_loss, history_w0, history_w1,current_loss
    current_loss = regressor.loss()
    history_loss += [current_loss]
    history_w0 += [regressor.w0]
    history_w1 += [regressor.w1]
    current_loss_w0, current_loss_grad_w0 = regressor.loss_w0()
    current_loss_w1, current_loss_grad_w1 = regressor.loss_w1()
    # (gw0,gw1) = regressor.gradient()

    sety(r2b, current_loss_w0)
    sety(r2b_2, current_loss_w1)
    setxy(r2r,[regressor.w0],[current_loss])
    setxy(r2r_2,[regressor.w1],[current_loss])
    
    tangent = current_loss + current_loss_grad_w0 * (w_domain - regressor.w0)  # tangent
    tangent_2 = current_loss + current_loss_grad_w1 * (w_domain - regressor.w1)  # tangent
    sety(r2tangent,tangent)
    sety(r2tangent_2,tangent_2)
    
def plot2_manual_update(w0,w1):
    #global s2, r2b, r2r, r2tangent, history_loss, history_w0, history_w1,current_loss
    global is_last_action_from_sliders,history_loss, history_w0, history_w1
    # dont save history if slider is moved a second time
    if is_last_action_from_sliders:
        del history_loss[-1]
        del history_w0[-1]
        del history_w1[-1]
    regressor.w0 = w0
    regressor.w1 = w1
    plot2_update()
    
def plot2_reset():
    w = np.arange(w_min, w_max, 0.25)
    setxy(r2b,w,w)
    setxy(r2b_2,w,w)
    setxy(r2r,[1],[1])
    setxy(r2r_2,[1],[1])
    setxy(r2tangent,w,np.ones_like(w_domain))
    setxy(r2tangent_2,w,np.ones_like(w_domain))

### 3D Loss Curve
Shows the loss in every possible w0, w1 - combination in a 3D-plot

In [9]:
def plot3_init():
    global w_grid,zz    
    w_grid = np.arange(w_min, w_max, w_delta)
    zz = np.array([[regressor.costs(w0_, w1_) for w0_ in w_grid] for w1_ in w_grid])


### Loss Development Over Time
Shows the loss of every iteration in a plot

In [10]:
def plot5_init():
    global s5, r5
    s5 = figure(title="Loss development over time",y_axis_type="log",y_range=(.1,10000),plot_width=width, plot_height=height, background_fill_color=bgcolor, y_axis_label="Loss",x_axis_label="iterations (time)")
    r5 = s5.line([0],[0])
    
def plot5_update():
    global r5
    x = np.arange(len(history_loss))+1
    y = history_loss
    setxy(r5,x,y)




### Loss Plots
Shows the top view of the 3D-loss-curve. Loss is represented by color: The darker the color, the lower our loss.

In [11]:
def plot4_init():
    global s4, r4a, r4b, r4c, color_mapper,fig
    xyrange = (-10,10)
    size = w_max - w_min
    color_mapper = LogColorMapper(palette="Viridis256")
    s4 = figure(title="Loss surface and development", x_range=xyrange, y_range=xyrange, plot_width=width, plot_height=height2, y_axis_label="w1",x_axis_label="w0")
    r4a = s4.image(image=[zz], color_mapper=color_mapper, dh=[size], dw=[size], x=[w_min], y=[w_min])
    r4b = s4.line([-10],[-10], line_color="white", line_width=3)
    r4c = s4.cross([-10],[-10],color="white",size=8,line_width=5)

def plot4_update():
    setxy(r4c,[regressor.w0],[regressor.w1])
    setxy(r4b,history_w0,history_w1)

def plot4_reset():
    setxy(r4a,[-10],[-10])
    setxy(r4c,[-10],[-10])
    setxy(r4b,[-10],[-10])
def plot4_new_colors():
    global r4a,r4b,r4c
    size = w_max - w_min
    r4a.visible = False
    r4a = s4.image(image=[zz], color_mapper=color_mapper, dh=[size], dw=[size], x=[w_min], y=[w_min])
def fig3d_init():
    global figx
    surfaceplot = go.Surface(
        x=w_grid,
        y=w_grid,
        z=zz,
        showscale=False,
        colorscale='Viridis',
        opacity=.9
    )
    scatterplot = go.Scatter3d(
        x=history_w0,
        y=history_w1,
        z=history_loss,
        mode='lines',
        line=dict(color='orange', width=5)

    )

    figx = go.FigureWidget(data=[scatterplot,surfaceplot])
    figx.update_layout(
        width=800,
        height=600,
        margin=dict(l=0, r=0, b=0, t=0),
        xaxis=dict(range=[-10, 10]),
        yaxis=dict(range=[-10, 10]),
        scene = dict(
            xaxis = dict(
                title='w0',
            ),
            yaxis = dict(
                title='w1'),
            zaxis = dict(
                title='loss'),
        ),
    )

def plotly_update(interv=1):
    #scatt = fig.data[1]
    if not interv is 1 and not len(history_loss) % interv is 0:
        return
    with figx.batch_update():
        figx.data[0].x=history_w0
        figx.data[0].y=history_w1
        figx.data[0].z=history_loss


  if not interv is 1 and not len(history_loss) % interv is 0:
  if not interv is 1 and not len(history_loss) % interv is 0:


#### Initialization of starting values, regressor and plots 

In [12]:
def new_vals():
    # np.random.seed(1337)
    global w0_true, w1_true
    rand = np.random.rand(2)
    w0_true = rand[0]*8# = 6.2
    w1_true = rand[1]*16-8# = -1.78
    
def init_vals():
    global rand,x_min, x_max, w_delta, n_train, w0_true, w1_true, history_w0, history_w1, history_loss, learning_rate, running, w_domain
    learning_rate = .02
    x_min = 0.0
    x_max = 10.0
    w_delta = .25
    # np.random.seed(1337)
    n_train = 16
    #w0_true = rand[0]# = 6.2
    #w1_true = rand[1]# = -1.78
    history_w0 = []
    history_w1 = []
    history_loss = []
    running = False
    w_min = -10.
    w_max = 10.
    w_domain = np.arange(w_min, w_max, 0.25)

def init_regressor():
    global train_X, train_t, regressor
    train_X = np.random.rand(n_train) * x_max
    train_t = lin_func(w0_true, w1_true, train_X) + np.random.normal(0, 1, n_train)
    regressor = LinearRegressor(-10., -10., train_X, train_t)
    regressor.loss_w0()
    regressor.loss_w1()
def init_plots():
    global x
    x = np.arange(x_min, x_max, 0.01)
    plot1_init(-10, -10)
    plot2_init()
    plot3_init()
    #Javascript("""window.jsonGraph={};""".format(jsonGraph))
    plot5_init()
    plot4_init()
    fig3d_init()


In [13]:
new_vals()
init_vals()    
init_regressor()
init_plots()

#### reset function to start over

In [14]:
def reset():
    global running, t, learning_rate
    running = False
    #t.do_run = False
    #t.join()
    init_vals()
    learning_rate = learning_rate_slider.value/100
    init_regressor()
    plot1_reset(-10, -10)
    plot2_reset()
    plot3_init()
    setxy(r5,[0],[0])
    plot4_reset()
    update()
    plotly_update()
    push_notebook()

#### Update and regressor progression functions

In [15]:

firstrun = True
def update_plot_sliders(w0=regressor.w0,w1=regressor.w1,learning=learning_rate*100,eps=regressor.eps):
    global learning_rate,is_button_action, is_last_action_from_sliders, firstrun
    
    if firstrun:
        return
    
    learning_rate = learning/100
    regressor.eps = eps
    plot1_update(w0,w1)
    
    #remove next line to enable manual "next step" functionality
    is_last_action_from_sliders = False
    
    if is_button_action:
        return

    plot2_manual_update(w0,w1)
    plot5_update()
    plot4_update()
    plotly_update()

    is_last_action_from_sliders = True
    push_notebook()
def update(w0=regressor.w0,w1=regressor.w1):
    plot1_update(w0,w1)
    plot2_update()
    push_notebook()
def update_sliders():
    global is_button_action, is_last_action_from_sliders
    is_last_action_from_sliders = False
    is_button_action = True
    w0_slider.value = regressor.w0
    w1_slider.value = regressor.w1
    is_button_action = False

def single_step():
    # fetch the gradient and add it to the current value
    regressor.w0 = regressor.w0 - learning_rate * regressor.grad_w0
    regressor.w1 = regressor.w1 - learning_rate * regressor.grad_w1
    
    update_sliders()


def n_steps(n):
    global running
    running = True
    for i in range(n):
        if not running:
            return
        single_step()
        plot2_update()
        plot5_update()
        plot4_update()
        plotly_update(3)
        push_notebook()
    plotly_update()
    running = False
def solve():
    global running, current_loss, iterations
    print("solving with learning rate {}%".format(learning_rate*100))
    i = 1
    n_steps(1)
    running = True
    limit = 250
    iterations = len(history_loss)
    while current_loss > regressor.eps and (iterations < 2 or (history_loss[iterations-2]-current_loss) > regressor.eps):
        
        if i > limit:
            
            print("unable to solve for eps={} with {} steps, try again for another {}".format(regressor.eps,iterations,limit))
            return
        if not running:
            return
        single_step()
        plot2_update()
        plot5_update()
        plot4_update()
        plotly_update(5)
        push_notebook()
        iterations = len(history_loss)
        i+=1
    running = False
    if iterations>1 and history_loss[iterations-2]-current_loss < 0:
        print("learning rate to high, not converging! last iteration: loss risen by {}".format(current_loss-history_loss[iterations-2]))
    else:
        print("solved after {} steps (eps={})".format(iterations,regressor.eps))
        print("latest loss change: {}".format(abs(current_loss-history_loss[iterations-2])))
    #print(iterations)
    #print(current_loss)
    plotly_update()
def output_text():
    clear_output()
    string = 'f(x) = {}x {} {}'.format(regressor.w1,'-' if regressor.w0 < 0 else '+',abs(regressor.w0)) + "\n"+'current loss: {}'.format(regressor.loss())
    return string

#update()


#### Buttons, sliders, etc.

In [16]:
from threading import Thread

w0_slider = FloatSlider(min=-10,max=10,step=.1,value=-10,layout=Layout(width='50%'), continuous_update=False)
w1_slider = FloatSlider(min=-10,max=10,step=.1,value=-10,layout=Layout(width='50%'), continuous_update=False)
learning_rate_slider = FloatSlider(min=.1,max=5,step=.01,value=2,description="learning %",layout=Layout(width='50%'), continuous_update=False)
eps_slider = FloatSlider(min=.01, max=1, step=.01, description="eps",layout=Layout(width='50%'), continuous_update=False)

button_1 = widgets.Button(description='1 step')
button_n = widgets.Button(description='25 steps')
button_up = widgets.Button(description='solve')
button_reset = widgets.Button(description='reset')
button_new = widgets.Button(description='new values')
button_save_manual = widgets.Button(description='next manual input')
#out = widgets.Output()
is_button_action = False
is_last_action_from_sliders = False

def on_button_1_clicked(_):
    #if not running:
    n_steps(1)

def on_button_n_clicked(_):
    #if not running:
    
    n_steps(25)
    #update()
    
def simulate_clicked(_):
    solve()

def on_button_reset_clicked(_):
    reset()
    update_sliders()
def on_button_newvalues_clicked(_):
    new_vals()
    reset()
    plot4_new_colors()
    push_notebook()
    print('goal: f(x) = {}x {} {}'.format(w1_true,'-' if w0_true < 0 else '+',abs(w0_true)))
def on_button_save_manual_clicked(_):
    update_sliders()
        
button_1.on_click(on_button_1_clicked)
button_n.on_click(on_button_n_clicked)
button_up.on_click(simulate_clicked)
button_reset.on_click(on_button_reset_clicked)
button_new.on_click(on_button_newvalues_clicked)
button_save_manual.on_click(on_button_save_manual_clicked)

#### Displaying the plots

In [17]:
interact(update_plot_sliders,w0=w0_slider,w1=w1_slider,learning=learning_rate_slider,eps=eps_slider)
widgets.HBox([button_1,button_n,button_up,button_reset])#,button_save_manual])


interactive(children=(FloatSlider(value=-10.0, continuous_update=False, description='w0', layout=Layout(width=…

HBox(children=(Button(description='1 step', style=ButtonStyle()), Button(description='25 steps', style=ButtonS…

In [18]:
print('goal: f(x) = {}x {} {}'.format(w1_true,'-' if w0_true < 0 else '+',abs(w0_true)))
plot = layout([[s1,s4],[s2,s2_2,s5],], sizing_mode='scale_width')
show(plot,notebook_handle=True)

firstrun = False

goal: f(x) = -3.05836426789414x + 6.949910796502894


### 3D-Plot with loss development

In [19]:
figx

FigureWidget({
    'data': [{'line': {'color': 'orange', 'width': 5},
              'mode': 'lines',
         …

#### Changing of Jupyter page-width (for larger plots)

1. find the following file (in user directory):
        
        .../Anaconda3/envs/<env-name>/Lib/site-packages/notebook/static/custom/custom.css

2. add the following code to it

        .container {
            width:70% !important;
        }
3. press ctrl-F5 to refresh browser and cached stylesheets

#### Scroll to Zoom

Press "wheel zoom"-button on the right of a plot. Scroll on graph to zoom both axis equally, scroll on axis to zoom only x or y axis.

In [20]:
# don't ask...
for i in range(2):
    reset()

