In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [None]:
def model(A, THETA):
    """
    Multivariate Linear Regression Model, Yh = A*THETA
    The matrix A is sometimes called the design matrix.
    """
    #hipotesis
    return A.dot(THETA) 

In [3]:
# Design Matrix
def designMatrix(Tau,X):
    q,_ = X.shape
    for p in range(q):
        M = powerVector(Tau,X[p,:])
        if p == 0:
            A = M
            continue
        A = np.vstack((A,M))
    return A

# Power Vector
def powerVector(Tau,V):
    if V.size == 0 or Tau == 0:
        return 1
    else:
        M = np.array([])
        Z = V[:-1]
        W = V[-1]
        for k in range(Tau+1):
            value = powerVector(Tau-k,Z)*(W**k)
            M = np.append(M,value)
        return M

In [4]:
# Polynomial parameter number
def polyParamsNumber(n,tau):
    s = 0
    for l in range(tau+1):
        val = np.math.factorial(l+n-1)/np.math.factorial(n-1)
        val = val/np.math.factorial(l)
        s = s + val
    return int(s)

In [None]:
"""funcion de costo. lamba se usa si se va a regularizar  """
def loss(Y_true, Y_pred, THETA, lambda_param):
    """ Mean Squared Error """
    E = Y_true - Y_pred
    SSE = np.square(np.linalg.norm(E, 'fro')) #funcion de osot 
    Reg = (lambda_param/(2*E.shape[0]))*np.square(np.linalg.norm(THETA[1:,:], 'fro')) #regularizacion, porque 1 en adenta la norma, eso seria si nos interesa los parametros 
    MSE = SSE/(2*E.shape[0]) + Reg
    return MSE

In [6]:
def gradient(A,E,THETA,lambda_param):
    """ MSE Gradient """
    SSEGrad = -1.0*(A.T).dot(E)
    MSEGrad = SSEGrad/E.shape[0]
    MSEGrad[1:,:] = MSEGrad[1:,:] + (lambda_param/E.shape[0])*THETA[1:,:]
    return MSEGrad

In [None]:
# Basic RMSprop
def rmsprop_optimization(
    X,
    Y,
    tau=1,
    lambda_param=0.0,
    maxEpochs=100,
    show=10,
    batch_size=16,
    learning_rate=0.01,
    stopping_threshold=1e-6,
    beta=0.9,
    epsilon=1e-8,
):
    """
    RMSprop optimization with support for mini-batches.
    """
    # Initialize the model parameters randomly
    n = X.shape[1] #varibales 
    m = Y.shape[1]#salidas que tengo, vairbaes objetivo 
    rho = polyParamsNumber(n,tau)
    THETA = np.random.randn(rho,m)
    # Initialize accumulators for squared gradients
    this = "RMSprop"
    S = np.zeros((rho,m)) # Momentum
    q = X.shape[0]
    n_minibatches = q // batch_size
    previous_loss = np.inf

    for epoch in range(maxEpochs+1):
        # Shuffle the data
        indices = np.random.permutation(q)
        np.random.shuffle(indices) #barajeo aleatorio 
        X = X[indices,:]
        Y = Y[indices,:]

        #ciclo por mini lots, si fuera por lotes no seria el for 
        for j in range(n_minibatches): 
            #calcula la cantiad de los minilotes 
            X_Batch = X[j*batch_size : (j+1)*batch_size,:]            
            Y_Batch = Y[j*batch_size : (j+1)*batch_size,:]
            # Compute the gradients
            A_Batch = designMatrix(tau,X_Batch)
            Y_pred = model(A_Batch,THETA)
            E_Batch = Y_Batch-Y_pred
            Grad = gradient(A_Batch,E_Batch,THETA,lambda_param)

            #apartir de aqui empieza el algoritmo RMSprop (aqui agregamos nuestro optimizador )

            # Update accumulators
            S = beta * S + (1 - beta) * (Grad**2)

            # Update parameters
            THETA -= learning_rate * Grad / (np.sqrt(S) + epsilon)
            
        if q % batch_size != 0:
            X_Batch = X[(j+1)*batch_size : q,:]            
            Y_Batch = Y[(j+1)*batch_size : q,:]
            # Compute the gradients
            A_Batch = designMatrix(tau,X_Batch)
            Y_pred = model(A_Batch,THETA)
            E_Batch = Y_Batch-Y_pred
            Grad = gradient(A_Batch,E_Batch,THETA,lambda_param)

            # Update accumulators
            S = beta * S + (1 - beta) * (Grad**2)

            # Update parameters
            THETA -= learning_rate * Grad / (np.sqrt(S) + epsilon)

        # Compute the loss
        A = designMatrix(tau,X)
        Yh = model(A,THETA)
        current_loss = loss(Y, Yh, THETA, lambda_param)

        # Progress
        if (np.fmod(epoch,show) == 0):
            print(this,end = ": ")
            if np.isfinite(maxEpochs):
                print("Epoch ",epoch, "/", maxEpochs,end = " ")
            if np.isfinite(stopping_threshold):
                print(", Performance %8.3e" % current_loss, "/", stopping_threshold, end = " ")
                
            print("\n")

        if abs(previous_loss - current_loss) < stopping_threshold:
            break

        previous_loss = current_loss

    return THETA

In [8]:
# Load dataset
data = np.loadtxt('challenge01_syntheticdataset22.txt',delimiter=',')
inputs  = data[:,0:2]
targets = data[:,2:4]

In [9]:
# Train and Test Split Data
inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, targets, random_state = 1, test_size = 0.4)

In [10]:
# Train and Test Data
# Train Data
xTrain = inputs_train
tTrain = targets_train
# Test Data
xTest = inputs_test
tTest = targets_test

In [11]:
# Find the optimal parameters m and b with RMSprop
tau = 1
lambda_param = 0.0

THETA = rmsprop_optimization(
    xTrain,
    tTrain,
    tau,
    lambda_param,
    maxEpochs=1000,
    show=100,
    batch_size=16,
    learning_rate=0.01,
    stopping_threshold=1e-6,
    beta=0.9,
    epsilon=1e-8,
)

RMSprop: Epoch  0 / 1000 , Performance 3.090e+02 / 1e-06 

RMSprop: Epoch  100 / 1000 , Performance 5.200e+01 / 1e-06 

RMSprop: Epoch  200 / 1000 , Performance 4.991e-01 / 1e-06 

RMSprop: Epoch  300 / 1000 , Performance 7.955e-04 / 1e-06 



In [12]:
# Make predictions
# Train data
A_train = designMatrix(tau,xTrain)
outputTrain = model(A_train,THETA)
# Test data
A_test = designMatrix(tau,xTest)
outputTest = model(A_test,THETA)

In [13]:
# R2 for raw train data
R2_train = r2_score(tTrain.reshape(-1, 1),outputTrain.reshape(-1, 1))
print(R2_train)

0.9999999974443166


In [None]:
# MSE for raw train data, #varianza 
MSE_train = mean_squared_error(tTrain.reshape(-1, 1),outputTrain.reshape(-1, 1))
print(MSE_train)

6.439502691451015e-07


In [None]:
# R2 for raw test data, datos crudos para los datos de prueba 
R2_test = r2_score(tTest.reshape(-1, 1),outputTest.reshape(-1, 1))
print(R2_test)

0.9999999969720091


In [None]:
# MSE for raw test data, prueba no utilizados en el entrenamineto 
MSE_test = mean_squared_error(tTest.reshape(-1, 1),outputTest.reshape(-1, 1))
print(MSE_test)

7.454081627423006e-07


In [17]:
print(THETA)

[[ 2.03228971  2.80426093]
 [-4.99994518  2.00005604]
 [ 7.00004526 -4.99995397]]
