## Regressão Linear Univariada

hipótese: $h_{\theta}(x^{(i)}) = \theta_0 + \theta_1x$

função de custo: $J(\theta) = \frac{1}{2N}\sum_{i=1}^{N}(h_{\theta}(x^{(i)}) - y^{(i)})^2$

In [None]:
#encoding=utf-8
from matplotlib import pyplot as plt
import numpy as np

t0 = 0.  # altere os pesos aqui
t1 = 0  # altere os pesos aqui

X, fx = np.array([0, 1, 2], dtype=float), np.array([0, 1, 4], dtype=float)  # conjuntos X e y

In [2]:
def hypothesis(x, t0, t1):
    return t0 + t1 * x

In [3]:
def cost_function(X, fx, h, t0, t1):
    soma = 0.
    N = len(X)
    
    for i in range(N):
        soma += (h(X[i], t0, t1) - fx[i]) ** 2.
    
    return (1./(2. * float(N))) * soma

In [None]:
print cost_function(X, fx, hypothesis, t0, t1)

### Projetando as predições

In [None]:
plt.plot(X, [hypothesis(x, t0, t1) for x in X], c='blue')
plt.scatter(X, fx, c='red')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title(u'Predições ' + r'para $\theta_0=$' + str(t0) + r' e $\theta_1=$' + str(t1))
plt.show()

### Projetando o gráfico de custos

In [None]:
t0s = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
t1s = [0.0, 0.5, 1.0, 1.5, 2.0, 2.5]
plt.plot(t1s, [cost_function(X, fx, hypothesis, a, b) for a, b in zip(t0s, t1s)], c='green')
plt.xlabel(r'$\theta_1$')
plt.ylabel(r'$J(\theta_1)$')
plt.title(u'Custo associado ao parâmetro ' + r'$\theta_1$')
plt.show()

***

### Atualização de pesos

In [4]:
def update_t0(X, fx, h, t0, t1, alpha):
    """
    Atualiza t0 com base nos N valores passados para esta função.
    """
    
    N = len(X)
    soma = 0.
    
    for i in range(N):
        soma += (h(X[i], t0, t1) - fx[i])
    
    return t0 - ((alpha * (1./float(N))) * soma)


def update_t1(X, fx, h, t0, t1, alpha):
    """
    Atualiza t1 com base nos N valores passados para esta função.
    """
    N = len(X)
    
    soma = 0.
    for i in range(N):
        soma += (h(X[i], t0, t1) - fx[i]) * X[i]
    
    return t1 - ((alpha * (1./float(N))) * soma)

In [None]:
# redefinindo os valores aqui, por conveniência

t0 = 0.1
t1 = 1.
alpha = 0.5
X, fx = np.array([0, 1, 2], dtype=float), np.array([0, 1, 4], dtype=float)

temp0 = update_t0(X, fx, hypothesis, t0, t1, alpha)
temp1 = update_t1(X, fx, hypothesis, t0, t1, alpha)

print 'antigo theta0: %f novo theta0: %f' % (t0, temp0)
print 'antigo theta1: %f novo theta1: %f' % (t1, temp1)

In [None]:
plt.plot(X, [hypothesis(x, t0, t1) for x in X], label=u'modelo antigo', c='blue')
plt.plot(X, [hypothesis(x, temp0, temp1) for x in X], label=u'novo modelo', c='purple')
plt.scatter(X, fx, label='dados reais', c='red')
plt.legend(loc='upper left')
plt.show()

### Regressão Linear univariada (processo completo)


In [None]:
# redefinindo os valores aqui, por conveniência

t0 = 0.1  # theta 0
t1 = 1.  # theta 1
alpha = 0.1  # taxa de aprendizado
X, fx = np.array([0, 1, 2, 3], dtype=float), np.array([1, 2, 4, 9], dtype=float)  # X e fx
threshold = 0.001  # diferença aceitável entre custos
batch_size = 2  # tamanho do batch
epoch = 0
max_epoch = 10  # máximo número de iterações permitido


prev = np.inf  # custo anterior
curr = cost_function(X, fx, hypothesis, t0, t1)  # custo atual
while (abs(curr - prev) > threshold) and (epoch < max_epoch):
    bc = 0  # contador de quantas instâncias passaram pelo batch
    
    for i in range(batch_size):
        X_local = X[bc:(bc + batch_size)]
        fx_local = fx[bc:(bc + batch_size)]
        
        temp0 = update_t0(X_local, fx_local, hypothesis, t0, t1, alpha)
        temp1 = update_t1(X_local, fx_local, hypothesis, t0, t1, alpha)
        
        t0 = temp0
        t1 = temp1
        
        bc += 1
    
    prev = curr
    curr = cost_function(X, fx, hypothesis, t0, t1)
    print 'custo na época %d: %f' % (epoch, curr)
    epoch += 1


# Exercício

custo na época 0: 103588707294442984476508160.000000
custo na época 1: 210913074974230844302020487193006093025411072.000000
custo na época 2: 429432187705221381466682854631615852829267553791090438485049344.000000
custo na época 3: 874350743119285322999469412977428921396838169995201328910104714605723665397972992.000000
custo na época 4: 1780232697689677972918878929697583774559529460827968382287878341381618024228319069495099064374525952.000000
custo na época 5: 3624664910350629560788043173843348001736169531152234270006127173320984592911724384860971539100894293583263679245189120.000000
custo na época 6: 7380044041083734427718418142607935444427123381756209549368906284624200702321509843911008857930361587709070297955895857264529024563019776.000000
custo na época 7: 15026230395202778597634680778892555333248395060284269946443786612393864566905403112109307140121094727278655624726291413429562314661291466775767530556358656.000000
custo na época 8: 3059434315470010733059806806809748705906832876058