In [1]:
import numpy as np
import math

############################################################
# Optimization problem

In [5]:
trueW = np.array([1, 2, 3, 4, 5])
def generate():
    x = np.random.randn(len(trueW))
    y = trueW.dot(x) + np.random.randn()
    #print('example', x, y)
    return (x, y)

trainExamples = [generate() for i in range(1000000)]

def phi(x):
    return np.array(x)

def initialWeightVector():
    return np.zeros(len(trueW))

def trainLoss(w):
    return 1.0 / len(trainExamples) * sum((w.dot(phi(x)) - y)**2 for x, y in trainExamples)

def gradientTrainLoss(w):
    return 1.0 / len(trainExamples) * sum(2 * (w.dot(phi(x)) - y) * phi(x) for x, y in trainExamples)

def loss(w, i):
    x, y = trainExamples[i]
    return (w.dot(phi(x)) - y)**2

def gradientLoss(w, i):
    x, y = trainExamples[i]
    return 2 * (w.dot(phi(x)) - y) * phi(x)

[1;30;43mSe truncaron las últimas líneas 5000 del resultado de transmisión.[0m
example [ 0.95918049  1.41983104  0.2301293   0.76279951 -0.59487991] 3.95372675203897
example [-0.39225532 -0.80823955 -0.40241577  0.03314825  0.26417816] -0.7246254653628195
example [ 1.00683533  0.60512081  0.00265852  1.45312035 -0.34072303] 6.482139357691089
example [-0.10312196 -0.49865012 -0.05319199  0.81670465 -0.21965493] 1.8446809222413967
example [-0.6025265  -0.70906859 -0.17395279  1.58817078 -0.06710671] 3.803279081829467
example [-0.53256908 -0.03998073 -0.44897859  0.99860442 -1.22361424] -3.50515994544115
example [-0.68226009  0.34292908  0.71031816 -0.25990666 -0.00398542] 0.054825480331895715
example [ 0.8164377   0.6594402   0.96149399  2.06220125 -0.51873191] 11.389290856738585
example [ 0.88745686  0.98942626  1.28663446 -0.75457077 -0.15030934] 1.7826796360442232
example [ 0.46131814 -0.90989118 -0.24013021  0.26159463  0.04834156] -1.7308497389839332
example [ 0.23227382 -1.811686

KeyboardInterrupt: ignored

############################################################
# Optimization algorithm


In [3]:
def gradientDescent(F, gradientF, initialWeightVector):
    w = initialWeightVector()
    eta = 0.1
    for t in range(500):
        value = F(w)
        gradient = gradientF(w)
        w = w - eta * gradient
        print(f'epoch {t}: w = {w}, F(w) = {value}, gradientF = {gradient}')

def stochasticGradientDescent(f, gradientf, n, initialWeightVector):
    w = initialWeightVector()
    numUpdates = 0
    for t in range(500):
        for i in range(n):
            value = f(w, i)
            gradient = gradientf(w, i)
            numUpdates += 1
            eta = 1.0 / math.sqrt(numUpdates)
            w = w - eta * gradient
        print(f'epoch {t}: w = {w}, F(w) = {value}, gradientF = {gradient}')

In [4]:
stochasticGradientDescent(loss, gradientLoss, len(trainExamples), initialWeightVector)

epoch 0: w = [0.93816301 2.01259262 3.02435927 4.01239817 5.04381949], F(w) = 1.2638493976732077, gradientF = [-1.94365976  3.19227547  1.46478589  1.11536843 -4.24104353]
epoch 1: w = [0.94095215 2.01152811 3.02303848 4.0063356  5.02903608], F(w) = 1.2992708584175765, gradientF = [-1.97070868  3.23670073  1.48517056  1.13089044 -4.30006397]
epoch 2: w = [0.94266549 2.01031857 3.02250133 4.00308507 5.02244815], F(w) = 1.3112433989733479, gradientF = [-1.97976772  3.25157933  1.49199766  1.13608896 -4.31983068]
epoch 3: w = [0.94400478 2.00940315 3.02215325 4.00100513 5.01851693], F(w) = 1.3169245865866566, gradientF = [-1.98405192  3.25861573  1.49522633  1.13854745 -4.32917877]
epoch 4: w = [0.94515571 2.00869199 3.02185653 3.99956879 5.01585147], F(w) = 1.3198598530691719, gradientF = [-1.9862618   3.26224524  1.49689174  1.13981559 -4.33400069]
epoch 5: w = [0.9461879  2.00811242 3.02157372 3.99853086 5.01390685], F(w) = 1.3213109819508873, gradientF = [-1.9873534   3.2640381   1.49

KeyboardInterrupt: ignored