# PRACTICA 5: REGRESIÓN LINEAL REGULARIZADA, SESGO Y VARIANZA

## 1. Regresión Lineal Regularizada

In [6]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.figure import Figure
import numpy as np
from pandas.io.parsers import read_csv
import scipy.optimize as opt
from scipy.io import loadmat

In [7]:
def coste(cThetas, mX, cY, lamb):
    cThetas = np.matrix(cThetas).transpose()
    m = len(mX)
    mX = np.c_[np.ones(mX.shape[0]), mX]
    hX = np.dot(mX, cThetas)
    vs = (1/(float(2*m))) * np.sum(np.power(hX - cY, 2))
    vs = vs + (lamb/float(2*m)) * (np.sum(np.power(cThetas,2)))
    return vs


In [8]:
def gradiente(thetas, mX, cY, lamb):
    lThetas = thetas * (lamb/float(mX.shape[0]))
    lThetas[0] = 0
    thetas = np.matrix(thetas).transpose()
    m1X = np.c_[np.ones(mX.shape[0]), mX]
    r = (1/float(mX.shape[0])) * (np.ravel(np.dot(m1X.transpose(), np.dot(m1X, thetas) - cY)) + np.ravel(lThetas))
    return r

In [9]:
def parametros(thetas, mX, mY, lamb):
    result = opt.minimize(fun=coste, x0=thetas,
                    jac=gradiente,
                     args=(mX, mY, lamb),
                     method='TNC',
                     options={'maxiter': 70})
    return result.x

In [10]:
def errorHipotesis(cThetas, mX, cY):
    cThetas = np.matrix(cThetas).transpose()
    m = len(mX)
    mX = np.c_[np.ones(mX.shape[0]), mX]
    hX = np.dot(mX, cThetas)
    return (1/(float(2*m))) * np.sum(np.power(hX - cY, 2))

In [11]:
def polDimNormalizado(cM, dim):
    pol = np.c_[cM, np.ones((cM.shape[0], dim-1))]
    pol = pol.transpose()
    for i in range(1, dim):
        pol[i] = pol[i-1] * pol[0]
    return normalizar(pol.transpose())

In [12]:
def normalizar(datos):
    mu = []
    sigma = []
    for j in range(0, len(datos[0])):
        mu += [np.mean(datos[:, j])]
        sigma += [np.std(datos[:,j])]
        datos[:,j] = (datos[:,j]-mu[j])/sigma[j]
    return (datos, mu, sigma)

In [13]:
def regresion():
    data = loadmat('ex5data1.mat')
    y = data ['y']
    X = data ['X']
    yVal = data ['yval']
    XVal = data ['Xval']
    yTest = data ['ytest']
    XTest = data ['Xtest']
    thetasZero = (np.matrix(np.zeros(len(X)))).transpose()
    nX8, mu, sigma = polDimNormalizado(X, 8)
    #thetas = (np.matrix(np.ones(X.shape[1] + 1))).transpose()
    thetas = (np.matrix(np.ones(nX8.shape[1] + 1))).transpose()
    curvasDeAprendizaje(X, y, XVal, yVal, thetas)
    #graficaRegresion(X, y, parametros(thetas, X, y, 0))
    graficaRegresion(X, y, parametros(thetas, nX8, y, 0))

In [14]:
def curvasDeAprendizaje(X, y, XVal, yVal, thetas):
    error = []
    errorVal = []
    for i in range(1, X.shape[0]):
        thetas = parametros(thetas, X[0:i], y[0:i], 0)
        error += [errorHipotesis(thetas, X[0:i], y[0:i])]
        errorVal += [errorHipotesis(thetas, XVal, yVal)]
    graficaCurvaAprendizaje(error, errorVal)

In [15]:
def graficaCurvaAprendizaje(error, errorVal):
    plt.xlabel('Numero de ejemplos de entrenamiento')
    plt.ylabel('Error')
    plt.plot(range(len(error)), error, '-', color='blue', label='Entreno')
    plt.plot(range(len(errorVal)), errorVal, '-', color='orange', label='Validacion cruzada')
    plt.legend()

In [16]:
def graficaRegresion(X, y, thetas):
    plt.figure()
    X = np.ravel(X)
    y = np.ravel(y)
    plt.scatter(X, y, c="red", marker='X')
    minX = min(X)
    maxX = max(y)
    plt.xlabel('Cambios en el nivel del agua')
    plt.ylabel('Agua derramada de la presa')
    x = np.matrix(np.linspace(minX, maxX, 10, endpoint=True))
    th0 = thetas[0]
    xn = polDimNormalizado(X, 8)
    xs =  np.tile(xn, (len(thetas), 1))
    thetas = np.matrix(thetas)
    thetas = np.delete(thetas, 0, 1)
    plt.plot(np.ravel(xn), np.ravel(th0 + np.ravel(np.dot(xs, thetas.transpose()))), '-', color = "blue")
    plt.savefig('minimize.png')

Autores: Montserrat Sacie Alcázar y Tomás Golomb Durán